diff --git a/Cargo.lock b/Cargo.lock index b5afcc81..10dc6157 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -273,7 +273,7 @@ dependencies = [ "anyhow", "bumpalo", "indexmap 2.2.6", - "rustc-hash", + "rustc-hash 1.1.0", "serde", "unicode-width", ] @@ -475,6 +475,7 @@ dependencies = [ "annotate-snippets", "anyhow", "derivative", + "drop_bomb", "hashbrown 0.14.5", "hi-doc", "jrsonnet-gcmodule", @@ -484,8 +485,9 @@ dependencies = [ "jrsonnet-types", "num-bigint", "pathdiff", - "rustc-hash", + "rustc-hash 2.0.0", "serde", + "smallvec", "stacker", "static_assertions", "strsim", @@ -533,7 +535,7 @@ version = "0.5.0-pre96" dependencies = [ "hashbrown 0.14.5", "jrsonnet-gcmodule", - "rustc-hash", + "rustc-hash 2.0.0", ] [[package]] @@ -551,10 +553,16 @@ version = "0.5.0-pre96" dependencies = [ "jrsonnet-gcmodule", "jrsonnet-interner", - "peg", + "jrsonnet-macros", + "jrsonnet-tokenizer", + "nom", "static_assertions", ] +[[package]] +name = "jrsonnet-pkg" +version = "0.5.0-pre96" + [[package]] name = "jrsonnet-rowan-parser" version = "0.5.0-pre96" @@ -581,7 +589,7 @@ dependencies = [ "md5", "num-bigint", "regex", - "rustc-hash", + "rustc-hash 2.0.0", "serde", "serde_json", "serde_yaml_with_quirks", @@ -590,12 +598,19 @@ dependencies = [ "sha3", ] +[[package]] +name = "jrsonnet-tokenizer" +version = "0.5.0-pre96" +dependencies = [ + "logos", + "nom", +] + [[package]] name = "jrsonnet-types" version = "0.5.0-pre96" dependencies = [ "jrsonnet-gcmodule", - "peg", ] [[package]] @@ -745,6 +760,22 @@ dependencies = [ "mimalloc-sys", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-bigint" version = "0.4.5" @@ -809,33 +840,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" -[[package]] -name = "peg" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a625d12ad770914cbf7eff6f9314c3ef803bfe364a1b20bc36ddf56673e71e5" -dependencies = [ - "peg-macros", - "peg-runtime", -] - -[[package]] -name = "peg-macros" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f241d42067ed3ab6a4fece1db720838e1418f36d868585a27931f95d6bc03582" -dependencies = [ - "peg-runtime", - "proc-macro2", - "quote", -] - -[[package]] -name = "peg-runtime" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3aeb8f54c078314c2065ee649a7241f46b9d8e418e1a9581ba0546657d7aa3a" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -964,7 +968,7 @@ dependencies = [ "countme", "hashbrown 0.14.5", "memoffset", - "rustc-hash", + "rustc-hash 1.1.0", "text-size", ] @@ -974,6 +978,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustix" version = "0.38.34" diff --git a/Cargo.toml b/Cargo.toml index de0f47d3..95b3d13f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,7 @@ sha3 = "0.10.8" # Source code parsing. # Jrsonnet has two parsers for jsonnet - one is for execution, and another is for better parsing diagnostics/lints/LSP. # First (and fast one) is based on peg, second is based on rowan. -peg = "0.8.3" +nom = "7.1.3" logos = "0.14.0" ungrammar = "1.16.1" rowan = "0.15.15" @@ -69,7 +69,7 @@ tempfile = "3.10" pathdiff = "0.2.1" hashbrown = "0.14.5" static_assertions = "1.1" -rustc-hash = "1.1" +rustc-hash = "2.0" num-bigint = "0.4.5" derivative = "2.2.0" strsim = "0.11.0" @@ -82,9 +82,6 @@ indexmap = "2.2.3" itertools = "0.13.0" xshell = "0.2.6" -lsp-server = "0.7.6" -lsp-types = "0.96.0" - regex = "1.10" lru = "0.12.3" diff --git a/cmds/jrsonnet/src/main.rs b/cmds/jrsonnet/src/main.rs index 415b9435..4513e55e 100644 --- a/cmds/jrsonnet/src/main.rs +++ b/cmds/jrsonnet/src/main.rs @@ -1,6 +1,9 @@ +// #![cfg(feature = "nightly", feature(unix_sigpipe))] + use std::{ fs::{create_dir_all, File}, io::{Read, Write}, + path::PathBuf, }; use clap::{CommandFactory, Parser}; @@ -11,6 +14,7 @@ use jrsonnet_evaluator::{ error::{Error as JrError, ErrorKind}, ResultExt, State, Val, }; +use jrsonnet_parser::Source; #[cfg(feature = "mimalloc")] #[global_allocator] @@ -23,6 +27,11 @@ enum SubOpts { /// Target shell name shell: Shell, }, + Analyze { + #[command(flatten)] + stdlib: StdOpts, + file: PathBuf, + }, } #[derive(Parser)] @@ -87,7 +96,9 @@ struct Opts { debug: DebugOpts, } -// TODO: Add unix_sigpipe = "sig_dfl" +// // Do not panic on pipe failure: https://github.com/rust-lang/rust/issues/97889 +// #[cfg(feature = "nightly", unix_sigpipe = "sig_dfl")] +// Feature was replaced with compiler flag, which can't be feature-gated, thus recommended, but only enabled in nix flake based builds. fn main() { let opts: Opts = Opts::parse(); @@ -105,6 +116,16 @@ fn main() { generate(shell, app, "jrsonnet", buf); std::process::exit(0) } + SubOpts::Analyze { file, stdlib } => { + let content = std::fs::read_to_string(file).expect("read file failed"); + let source = Source::new_virtual("name".into(), (&content).into()); + let ast = + jrsonnet_parser::parse(&content, &jrsonnet_parser::ParserSettings { source }) + .expect("parsed"); + let ctx = stdlib.context_initializer().expect("ctx"); + jrsonnet_evaluator::analyze_root(State::default(), &ast, ctx); + std::process::exit(0) + } } } diff --git a/crates/jrsonnet-evaluator/Cargo.toml b/crates/jrsonnet-evaluator/Cargo.toml index d0905cb7..4b69bd2d 100644 --- a/crates/jrsonnet-evaluator/Cargo.toml +++ b/crates/jrsonnet-evaluator/Cargo.toml @@ -61,3 +61,5 @@ hi-doc = { workspace = true, optional = true } num-bigint = { workspace = true, features = ["serde"], optional = true } derivative.workspace = true stacker = "0.1.15" +smallvec = "1.13.2" +drop_bomb.workspace = true diff --git a/crates/jrsonnet-evaluator/src/analyze.rs b/crates/jrsonnet-evaluator/src/analyze.rs new file mode 100644 index 00000000..5a17d48b --- /dev/null +++ b/crates/jrsonnet-evaluator/src/analyze.rs @@ -0,0 +1,882 @@ +// { +// a: $, // Is an equivalent of super, making field `a` object-dependent, we can't cache it per-object +// b: { +// a: $, // Field `a` is not object-dependent, because object `b` itself is object-dependent, but every field in it aren't bound to the top object, +// // This is the fact that `b` itself will be created once per top-level object. +// }, +// } +// +// Same thing with locals. Should $ be handled as local instead of this magic? + +use drop_bomb::DropBomb; +use hi_doc::{Formatting, SnippetBuilder, Text}; +use jrsonnet_interner::IStr; +use jrsonnet_parser::{ + AssertStmt, BindSpec, Destruct, Expr, LiteralType, LocExpr, ObjBody, Param, SliceDesc, Source, + Span, Spanned, +}; +use rustc_hash::{FxHashMap, FxHashSet}; + +use crate::{ContextBuilder, ContextInitializer, State}; + +#[derive(Debug, Clone, Copy)] +#[must_use] +struct AnalysisResult { + // Highest object, on which identity the value is dependent. u32::MAX = not dependent at all + object_dependent_depth: u32, + // Highest local, on which this value depends. u32::MAX = not dependent at all + local_dependent_depth: u32, +} +impl AnalysisResult { + fn depend_on_object(&mut self, object: u32) -> bool { + if object < self.object_dependent_depth { + self.object_dependent_depth = object; + true + } else { + false + } + } + fn depend_on_local(&mut self, local: u32) -> bool { + if local < self.local_dependent_depth { + self.local_dependent_depth = local; + true + } else { + false + } + } + fn taint_by(&mut self, result: &AnalysisResult) -> bool { + self.depend_on_object(result.object_dependent_depth) + } +} +struct LocalDefinition { + name: Spanned, + // At which tree depth this local was defined + defined_at_depth: u32, + /// Min depth, at which this local was used. `u32::MAX` = not used at all. + /// This check won't catch unused argument closures, i.e: + /// ```jsonnet + /// local + /// a = b, + /// b = a, + /// ; 2 + 2 + /// + /// ``` + /// Both `a` and `b` here are "used", but the whole closure was not used here. + used_at_depth: u32, + /// Used as part of closure + /// TODO: Store indirect analysis separately + used_by_current_frame: bool, + // Analysys result for value of this local + analysis: AnalysisResult, + // For sanity checking, locals are initialized in batchs, use first_uninitialized_local + analyzed: bool, + // During walk over uninitialized vars, we can't refer to analysis results of other locals, + // but we need to. To make that work, for each variable in variable frame we capture its closure, + // by looking at referenced variables. + referened: bool, +} +impl LocalDefinition { + fn use_at(&mut self, depth: u32) { + if depth == self.defined_at_depth { + // TODO: Don't ignore self-uses, also see comment about indirect analysis + self.used_by_current_frame = true; + return; + } + self.used_at_depth = self.used_at_depth.min(depth); + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct LocalId(usize); +impl LocalId { + fn defined_before(self, other: Self) -> bool { + self.0 < other.0 + } + fn local_before(self) -> LocalId { + LocalId(self.0 - 1) + } +} + +struct AnalysisStack { + local_defs: Vec, + // Shadowing isn't used in jsonnet much, 2 because SmallVec allows to store 2 ptr-sized without overhead. + // TODO: Add test for this assumption (sizeof(SmallVec<[usize; 1]>) == sizeof(SmallVec<[usize; 2]>)) + local_by_name: FxHashMap>, + // Locals in jsonnet are mutually-recursive, some locals aren't analyzable before dependencies are analyzed, + // and dependencies may depend on local itself. To fix this issue - unanalyzed locals are not analyzed the same way as normal. + first_uninitialized_local: LocalId, + + // How deep we are recursed into expression tree. + depth: u32, + // Last depth, at which object has appeared. u32::MAX = not appeared at all + last_object_depth: u32, + // First depth, at which object has appeared. u32::MAX = not appeared at all + // $ refers to this object. + first_object_depth: u32, + + result: hi_doc::SnippetBuilder, + errored: bool, +} +impl AnalysisStack { + fn new(src: &str) -> Self { + Self { + local_defs: vec![], + local_by_name: FxHashMap::default(), + first_uninitialized_local: LocalId(0), + depth: 0, + last_object_depth: u32::MAX, + first_object_depth: u32::MAX, + result: SnippetBuilder::new(src), + errored: false, + } + } + fn first_object(&self) -> u32 { + assert!( + self.first_object_depth != u32::MAX, + "$ used outside of object" + ); + self.first_object_depth + } + fn last_object(&self) -> u32 { + assert!( + self.last_object_depth != u32::MAX, + "this/super used outside of object" + ); + self.last_object_depth + } + fn local(&mut self, name: &IStr, span: Span) -> Option<(&mut LocalDefinition, bool)> { + let Some(local_id) = self.local_by_name.get(name) else { + self.result + .error(Text::single( + format!("local is not defined: {name}").chars(), + Formatting::default(), + )) + .range(span.1 as usize..=(span.2 - 1) as usize) + .build(); + self.errored = true; + return None; + }; + let local_id = *local_id + .last() + .expect("local not defined (maybe defined deeper)"); + Some(( + &mut self.local_defs[local_id.0], + local_id.defined_before(self.first_uninitialized_local), + )) + } + fn use_local(&mut self, name: &IStr, span: Span, taint: &mut AnalysisResult) { + let depth = self.depth; + let errored = self.errored; + let Some((local, initialized)) = self.local(name, span) else { + return; + }; + local.use_at(depth); + if dbg!(initialized) { + // It is ok for errored output to do that + if !errored { + assert!( + local.analyzed, + "sanity: initialized == true => analyzed == true, two markers should match for {name}" + ); + } + taint.taint_by(&local.analysis); + } else { + println!("local referenced!"); + local.referened = true; + } + taint.depend_on_local(local.defined_at_depth); + } + // TODO: Is'nt exacly correct that it is in PendingUsage state, there should be yet another one, to prevent from + // using it before closures are finished processing... Or maybe it should be done at the same time as + // `finish_local_initializations`? + fn local_uses_local(&mut self, local: LocalId, uses: &LocalId) -> bool { + dbg!(local, uses); + let mut changed = false; + let user_used_at_depth = self.local_defs[local.0].used_at_depth; + + let uses = &mut self.local_defs[uses.0]; + let defined_at_depth = uses.defined_at_depth; + let analysis = uses.analysis; + + if dbg!(user_used_at_depth) < dbg!(uses.used_at_depth) { + changed = true; + uses.used_at_depth = user_used_at_depth; + } + + let user = &mut self.local_defs[local.0]; + assert_eq!( + user.defined_at_depth, defined_at_depth, + "local_uses_local is only intended to be used at the sibling locals" + ); + // TODO: Store indirect analysis in different field? Currently all indirect analysis are propagated as + // analysis field + + changed |= user.analysis.taint_by(&analysis); + changed |= user.analysis.depend_on_local(defined_at_depth); + changed + } + fn ensure_no_unitialized_locals(&self) { + assert_eq!( + self.first_uninitialized_local, + self.next_local_id(), + "unexpected uninitialized locals" + ); + } + fn next_local_id(&self) -> LocalId { + LocalId(self.local_defs.len()) + } + fn start_local_deconstructions(&self) -> PendingDeconstructions { + self.ensure_no_unitialized_locals(); + PendingDeconstructions { + first_in_frame: self.next_local_id(), + bomb: DropBomb::new( + "after locals are defined, you need to pass DupeCheckMarker to new_locals_list", + ), + } + } + /// # Panics + /// + /// If locale is already defined + fn define_external_local(&mut self, name: IStr) { + self.ensure_no_unitialized_locals(); + let next_local_id = self.next_local_id(); + let found = self.local_by_name.entry(name.clone()).or_default(); + // Empty by-names are preserved + if let Some(id) = found.last() { + panic!("external locals should not be redefined"); + }; + found.push(next_local_id); + self.local_defs.push(LocalDefinition { + name: Spanned( + name, + Span(Source::new_virtual("UNNAMED".into(), "".into()), 0, 0), + ), + defined_at_depth: 0, + analysis: AnalysisResult::default(), + used_at_depth: 0, + analyzed: false, + referened: false, + used_by_current_frame: false, + }); + // External local is always initialized + self.first_uninitialized_local = self.next_local_id(); + eprintln!("First uninit = {:?}", self.first_uninitialized_local); + } + #[must_use] + fn define_local(&mut self, dupe: &PendingDeconstructions, name: Spanned) -> Option<()> { + let next_local_id = self.next_local_id(); + let found = self.local_by_name.entry(name.0.clone()).or_default(); + // Empty by-names are preserved + if let Some(id) = found.last() { + if !id.defined_before(dupe.first_in_frame) { + self.result + .error(Text::single( + format!("variable redeclared: {}", name.0).chars(), + Formatting::default(), + )) + .range(name.1.range()) + .build(); + return None; + } + }; + found.push(next_local_id); + self.local_defs.push(LocalDefinition { + name, + defined_at_depth: self.depth, + analysis: AnalysisResult::default(), + used_at_depth: u32::MAX, + analyzed: false, + referened: false, + used_by_current_frame: false, + }); + Some(()) + } + fn finish_local_deconstructions( + &mut self, + PendingDeconstructions { + first_in_frame, + mut bomb, + }: PendingDeconstructions, + ) -> PendingInitialization { + bomb.defuse(); + for ele in &self.local_defs[first_in_frame.0..] { + assert_eq!( + ele.defined_at_depth, self.depth, + "sanity: depth was changed during deconstructions" + ); + assert_eq!( + ele.used_at_depth, + u32::MAX, + "should not use locals before deconstructions finished" + ); + } + let first_after_frame = self.next_local_id(); + assert_ne!( + first_in_frame, first_after_frame, + "no locals were defined during deconstruction" + ); + PendingInitialization { + first_in_frame, + first_after_frame, + bomb: DropBomb::new( + "after you done with initialization - pass to finish_local_initializations", + ), + } + } + fn initialize_local( + &mut self, + pending: &PendingInitialization, + id: LocalId, + analysis: AnalysisResult, + taint: &mut AnalysisResult, + ) { + let local = &mut self.local_defs[id.0]; + assert!(!local.analyzed, "sanity: already initialized"); + pending.ensure_pending(id); + taint.taint_by(&analysis); + local.analysis = analysis; + + local.analyzed = true; + } + fn finish_local_initializations( + &mut self, + PendingInitialization { + first_in_frame, + first_after_frame, + mut bomb, + }: PendingInitialization, + ) -> PendingUsage { + bomb.defuse(); + assert_eq!( + first_after_frame, + self.next_local_id(), + "during local initialization, there were unfinished locals" + ); + self.first_uninitialized_local = self.next_local_id(); + eprintln!("First uninit = {:?}", self.first_uninitialized_local); + + for ele in &self.local_defs[first_in_frame.0..first_after_frame.0] { + assert!(ele.analyzed, "sanity: not initialized"); + assert!( + !ele.referened, + "sanity: referenced field was not resed, local closure isn't fully captured" + ); + } + + PendingUsage { + first_in_frame, + first_after_frame, + bomb: DropBomb::new("after you done with usage - pass to finish_local_usages"), + } + } + fn finish_local_usages( + &mut self, + closures: &Closures, + PendingUsage { + first_in_frame, + first_after_frame, + mut bomb, + }: PendingUsage, + ) { + bomb.defuse(); + self.ensure_no_unitialized_locals(); + assert_eq!( + first_after_frame, + self.next_local_id(), + "unfinished locals stack found" + ); + + { + // FIXME: It should only handle local uses (used_at), data about local data themselves should be processed + // before handle_inside + let mut changed = true; + while changed { + changed = false; + closures.process(|closure| { + for uses in closure.references_locals { + changed |= self.local_uses_local(closure.local, uses); + } + }); + } + } + + let mut expected_idx = first_after_frame; + for ele in self.local_defs.drain(first_in_frame.0..).rev() { + expected_idx = expected_idx.local_before(); + let id = self + .local_by_name + .get_mut(&ele.name.0) + .expect("exists") + .pop() + .expect("exists"); + assert_eq!(id, expected_idx, "sanity: by name map correctness"); + assert!( + ele.used_at_depth >= self.depth, + "sanity: lower expression can't reach upper" + ); + assert_eq!(ele.defined_at_depth, self.depth, "sanity: depth was not decreased/decreased too much after finishing working with locals"); + assert!(ele.analyzed); + if ele.used_at_depth == u32::MAX { + self.result + .warning(Text::single( + format!("local was not used: {}", ele.name.0).chars(), + Formatting::default(), + )) + .range(ele.name.1.range()) + .build(); + } + if dbg!(ele.used_at_depth) == dbg!(ele.defined_at_depth) { + self.result + .warning(Text::single( + format!( + "local was not used (only in closure, which wasn't referenced): {0}", + ele.name.0 + ) + .chars(), + Formatting::default(), + )) + .range(ele.name.1.range()) + .build(); + } + if ele.analysis.local_dependent_depth < ele.defined_at_depth + || ele.analysis.object_dependent_depth < ele.defined_at_depth + { + self.result + .warning(Text::single( + format!( + "local is only using items from parent scope, move it higher: {}", + ele.name.0 + ) + .chars(), + Formatting::default(), + )) + .range(ele.name.1.range()) + .build(); + } + } + self.first_uninitialized_local = first_in_frame; + eprintln!("First uninit = {:?}", self.first_uninitialized_local); + } +} + +struct PendingDeconstructions { + first_in_frame: LocalId, + bomb: DropBomb, +} +impl PendingDeconstructions { + fn abandon(mut self) { + self.bomb.defuse(); + } +} +struct PendingInitialization { + first_in_frame: LocalId, + first_after_frame: LocalId, + bomb: DropBomb, +} +impl PendingInitialization { + fn ensure_pending(&self, local: LocalId) { + assert!( + local.defined_before(self.first_after_frame) + && !local.defined_before(self.first_in_frame), + "sanity: expected to be pending" + ); + } + fn indexes(&self) -> impl Iterator { + (self.first_in_frame.0..self.first_after_frame.0).map(LocalId) + } +} +struct PendingUsage { + first_in_frame: LocalId, + first_after_frame: LocalId, + bomb: DropBomb, +} + +#[allow(clippy::too_many_lines)] +fn analyze(expr: &LocExpr, stack: &mut AnalysisStack) -> AnalysisResult { + let mut res = AnalysisResult::default(); + let span = expr.span(); + match expr.expr() { + // Locals + Expr::ArrComp(elem, comp) => { + todo!("FORSPEC WORKS AS LOCAL"); + } + Expr::LocalExpr(l, v) => return analyze_local(&l, stack, |stack| analyze(v, stack)), + + // Objects + Expr::Obj(obj) => return analyze_object(obj, stack), + + // Dependencies + Expr::Var(v) => { + stack.use_local(v, span, &mut res); + } + Expr::Literal(l) => match l { + LiteralType::This | LiteralType::Super => { + res.depend_on_object(stack.last_object()); + } + LiteralType::Dollar => { + res.depend_on_object(stack.first_object()); + } + LiteralType::Null | LiteralType::True | LiteralType::False => {} + }, + + // Boring + Expr::Str(_) => {} + Expr::Num(_) => {} + Expr::Arr(a) => { + for elem in a { + let elem_res = analyze(elem, stack); + res.taint_by(&elem_res); + } + } + Expr::UnaryOp(_, value) => { + res.taint_by(&analyze(value, stack)); + } + Expr::BinaryOp(left, _, right) => { + res.taint_by(&analyze(left, stack)); + res.taint_by(&analyze(right, stack)); + } + Expr::AssertExpr(AssertStmt(cond, message), rest) => { + res.taint_by(&analyze(cond, stack)); + if let Some(message) = message { + res.taint_by(&analyze(message, stack)); + } + res.taint_by(&analyze(rest, stack)); + } + Expr::Import(v) | Expr::ImportStr(v) | Expr::ImportBin(v) => { + assert!( + matches!(v.expr(), Expr::Str(_)), + "import with non-string expression is not allowed" + ); + } + Expr::ErrorStmt(e) => { + res.taint_by(&analyze(e, stack)); + } + Expr::Apply(applicable, args, _) => { + res.taint_by(&analyze(applicable, stack)); + for arg in &args.unnamed { + res.taint_by(&analyze(arg, stack)); + } + let mut passed = FxHashSet::default(); + for (name, arg) in &args.named { + assert!(passed.insert(name), "argument was passed twice: {name}"); + res.taint_by(&analyze(arg, stack)); + } + } + Expr::Function(_, _) => todo!(), + Expr::IfElse { + cond, + cond_then, + cond_else, + } => { + res.taint_by(&analyze(&cond.0, stack)); + res.taint_by(&analyze(cond_then, stack)); + if let Some(cond_else) = cond_else { + res.taint_by(&analyze(cond_else, stack)); + } + } + Expr::Slice(expr, SliceDesc { start, end, step }) => { + res.taint_by(&analyze(expr, stack)); + if let Some(start) = &start { + res.taint_by(&analyze(start, stack)); + } + if let Some(end) = &end { + res.taint_by(&analyze(end, stack)); + } + if let Some(step) = &step { + res.taint_by(&analyze(step, stack)); + } + } + Expr::Index { indexable, parts } => { + res.taint_by(&analyze(indexable, stack)); + for ele in parts { + res.taint_by(&analyze(&ele.value, stack)); + } + } + } + res +} +fn analyze_object(obj: &ObjBody, stack: &mut AnalysisStack) -> AnalysisResult { + todo!() +} + +#[must_use] +fn process_destruct( + bind: &Destruct, + stack: &mut AnalysisStack, + dupe: &PendingDeconstructions, +) -> Option<()> { + match bind { + Destruct::Full(f) => stack.define_local(dupe, f.clone()), + } +} +trait Local { + fn destruct(&self) -> &Destruct; + fn initialize( + &self, + stack: &mut AnalysisStack, + dupe: &PendingInitialization, + ids: &mut impl Iterator, + taint: &mut AnalysisResult, + ) -> Option<()>; +} +fn initialize_destruct_from_result( + destruct: &Destruct, + result: AnalysisResult, + + stack: &mut AnalysisStack, + dupe: &PendingInitialization, + ids: &mut impl Iterator, + taint: &mut AnalysisResult, +) { + match destruct { + Destruct::Full(_) => { + stack.initialize_local(dupe, ids.next().expect("not finished yet"), result, taint); + } + } +} +impl Local for BindSpec { + fn destruct(&self) -> &Destruct { + match &self { + Self::Field { into, value: _ } => into, + Self::Function { + name, + params: _, + value: _, + } => name, + } + } + + fn initialize( + &self, + stack: &mut AnalysisStack, + dupe: &PendingInitialization, + ids: &mut impl Iterator, + taint: &mut AnalysisResult, + ) -> Option<()> { + match &self { + Self::Field { into, value } => { + let res = analyze(value, stack); + initialize_destruct_from_result(into, res, stack, dupe, ids, taint); + } + Self::Function { + name, + params, + value, + } => { + let res = analyze_local(¶ms.0, stack, |stack| analyze(value, stack)); + initialize_destruct_from_result(name, res, stack, dupe, ids, taint); + } + }; + Some(()) + } +} +impl Local for Param { + fn destruct(&self) -> &Destruct { + &self.0 + } + + fn initialize( + &self, + stack: &mut AnalysisStack, + dupe: &PendingInitialization, + ids: &mut impl Iterator, + taint: &mut AnalysisResult, + ) -> Option<()> { + let res = self + .1 + .as_ref() + .map(|e| analyze(e, stack)) + .unwrap_or_default(); + initialize_destruct_from_result(&self.0, res, stack, dupe, ids, taint); + Some(()) + } +} + +#[allow(clippy::struct_field_names)] +struct Closures { + /// All the referenced locals, maybe repeated multiple times + /// It is recorded as continous vec of sets, I.e we have + /// a = 1, 2, 3 + /// b = 3, 4, 5, 6 + /// And in `referenced` we have `[ 1, 2, 3, 3, 4, 5, 6 ]`. To actually get, which closure refers to which element, see `closures`... + referenced: Vec, + + /// Amount of elements per closure, for the above case it is a = 3, b = 4, so here + /// lies `[ 3, 4 ]` + /// ~~closures: Vec,~~ + /// Finally, we have destructs. + /// Because single destruct references single closure, but destructs to multiple locals, we have even more complicated structure. + /// Luckly, every destruct is not interleaved with each other, so here we can have full list... + /// Imagine having (LocalId(20), LocalId(21)), we need to save it to the Map, but we know that the numbers are sequential, so here we store number of consequent elements + /// for each destruct starting from `first_destruct_local` + /// ~~destructs: Vec,~~ + /// + /// => two of those fields were merged, as there is currently no per-destruct tracking of closures. + closures_destructs: Vec<(usize, usize)>, + + /// This is not a related doccomment, just a continuation of docs for previous fields. + /// Having + /// ```jsonnet + /// local + /// [a, b, c] = [d, e, f], + /// [d, e, f] = [a, b, c, h], + /// h = 1, + /// ; + /// ``` + /// + /// We have total of 7 locals + /// First local here is `a` => `first_destruct_local` = `a` + /// For first closure `[a, b, c] = [d, e, f]` we have 3 referenced locals = [d, e, f] => `referenced += [d, e, f]`, `closures += 3`; 3 destructs = [a, b, c] => `destructs += 3` + /// [d, e, f] = [a, b, c, h], => `referenced += [a, b, c, h]`, `closures += 4`, `destructs += 3` (Note that this destruct will fail at runtime, + /// this thing should not care about that, it only captures what the value are referencing) + /// h = 1 => referenced += [], closures += 0, destructs += 1 + /// And the result is + /// + /// ``` + /// Closures { + /// referenced: vec![d, e, f, a, b, c, h] + /// closures: vec![3, 4, 0], + /// destructs: vec![3, 3, 1], + /// first_destruct_label: a, + /// } + /// ``` + /// + /// Reconstruction of that: + /// + /// We know that we start with a + /// We get the first number from destructs: `destructs.shift() == 3` => `destructs = [3, 1]` + /// 3 elements counting from a => [a, b, c] + /// Then we take first number from closures: `closures.shift() == 3` => `closures = [4, 0]` + /// Then we take 3 items from referenced: `referenced.shift()x3 == d, e, f` => `referenced = [a, b, c, h]` + /// + /// Thus we have [a, b, c] = [d, e, f] + /// + /// ~~TODO: Merge closures and destructs? I don't think I interested in closure per destruct, but it is possible o implement.~~ - merged + first_destruct_local: LocalId, +} +impl Closures { + fn new(first_local: LocalId) -> Self { + Self { + first_destruct_local: first_local, + closures_destructs: vec![], + referenced: vec![], + } + } + fn process(&self, mut handle: impl FnMut(Closure<'_>)) { + let mut referenced = self.referenced.as_slice(); + let mut current_local = self.first_destruct_local; + for (closures, destructs) in self.closures_destructs.iter().copied() { + let (this_referenced, next_referenced) = referenced.split_at(closures); + for _ in 0..destructs { + handle(Closure { + local: current_local, + references_locals: this_referenced, + }); + current_local.0 += 1; + } + referenced = next_referenced; + } + } +} +struct Closure<'i> { + local: LocalId, + references_locals: &'i [LocalId], +} + +fn analyze_local( + specs: &[T], + stack: &mut AnalysisStack, + handle_inside: impl FnOnce(&mut AnalysisStack) -> AnalysisResult, +) -> AnalysisResult { + let pending_decon = stack.start_local_deconstructions(); + + let mut had_errors = false; + for local in specs { + if process_destruct(local.destruct(), stack, &pending_decon).is_none() { + had_errors = true; + } + } + // Can't continue after failed destructuring, as some local ids were not allocated. + if had_errors { + pending_decon.abandon(); + return AnalysisResult::default(); + } + + let pending_init = stack.finish_local_deconstructions(pending_decon); + + let mut res = AnalysisResult::default(); + + let mut ids = pending_init.indexes(); + + let mut closures = Closures::new( + pending_init + .indexes() + .next() + .expect("empty local blocks are forbidden"), + ); + + for spec in specs { + let mut destructs = 0; + spec.initialize( + stack, + &pending_init, + &mut (&mut ids).inspect(|_| { + destructs += 1; + }), + &mut res, + ); + + let referenced_before = closures.referenced.len(); + for may_referenced_id in pending_init.indexes() { + let may_referenced = &mut stack.local_defs[may_referenced_id.0]; + if may_referenced.referened { + closures.referenced.push(may_referenced_id); + } + may_referenced.referened = false; + } + let referenced_after = closures.referenced.len(); + + closures + .closures_destructs + .push((referenced_after - referenced_before, destructs)); + } + + assert!( + ids.next().is_none() || stack.errored, + "locals uninitialized!" + ); + + let pending_usage = stack.finish_local_initializations(pending_init); + + stack.depth += 1; + + let inner_res = handle_inside(stack); + res.taint_by(&inner_res); + + stack.depth -= 1; + + stack.finish_local_usages(&closures, pending_usage); + + res +} + +impl Default for AnalysisResult { + fn default() -> Self { + Self { + object_dependent_depth: u32::MAX, + local_dependent_depth: u32::MAX, + } + } +} + +pub fn analyze_root(state: State, expr: &LocExpr, ctx: impl ContextInitializer) { + let mut builder = ContextBuilder::new(state); + ctx.populate(expr.span().0, &mut builder); + let mut stack = AnalysisStack::new(expr.span().0.code()); + for binding in builder.binding_list_for_analysis() { + stack.define_external_local(binding); + } + let _ = analyze(expr, &mut stack); + let source = hi_doc::source_to_ansi(&stack.result.build()); + println!("{source}"); +} diff --git a/crates/jrsonnet-evaluator/src/ctx.rs b/crates/jrsonnet-evaluator/src/ctx.rs index ab4b81cd..155f61a8 100644 --- a/crates/jrsonnet-evaluator/src/ctx.rs +++ b/crates/jrsonnet-evaluator/src/ctx.rs @@ -180,4 +180,7 @@ impl ContextBuilder { })) } } + pub(crate) fn binding_list_for_analysis(self) -> impl Iterator { + self.bindings.0.into_keys() + } } diff --git a/crates/jrsonnet-evaluator/src/evaluate/destructure.rs b/crates/jrsonnet-evaluator/src/evaluate/destructure.rs index 73284792..2dc83e76 100644 --- a/crates/jrsonnet-evaluator/src/evaluate/destructure.rs +++ b/crates/jrsonnet-evaluator/src/evaluate/destructure.rs @@ -21,9 +21,9 @@ pub fn destruct( ) -> Result<()> { match d { Destruct::Full(v) => { - let old = new_bindings.insert(v.clone(), parent); + let old = new_bindings.insert(v.0.clone(), parent); if old.is_some() { - bail!(DuplicateLocalVar(v.clone())) + bail!(DuplicateLocalVar(v.0.clone())) } } #[cfg(feature = "exp-destruct")] @@ -301,17 +301,23 @@ pub fn evaluate_dest( } } + #[allow(irrefutable_let_patterns)] + let Destruct::Full(name) = name + else { + panic!("parser will not allow destruct other than Full for function"); + }; + let old = new_bindings.insert( - name.clone(), + name.0.clone(), Thunk::new(MethodThunk { fctx, - name: name.clone(), + name: name.0.clone(), params: params.clone(), value: value.clone(), }), ); if old.is_some() { - bail!(DuplicateLocalVar(name.clone())) + bail!(DuplicateLocalVar(name.0.clone())) } } } diff --git a/crates/jrsonnet-evaluator/src/evaluate/mod.rs b/crates/jrsonnet-evaluator/src/evaluate/mod.rs index 350def58..310ff516 100644 --- a/crates/jrsonnet-evaluator/src/evaluate/mod.rs +++ b/crates/jrsonnet-evaluator/src/evaluate/mod.rs @@ -51,7 +51,6 @@ pub fn evaluate_trivial(expr: &LocExpr) -> Option { | Expr::Num(_) | Expr::Literal(LiteralType::False | LiteralType::True | LiteralType::Null) => true, Expr::Arr(a) => a.iter().all(is_trivial), - Expr::Parened(e) => is_trivial(e), _ => false, } } @@ -74,7 +73,6 @@ pub fn evaluate_trivial(expr: &LocExpr) -> Option { .collect(), )) } - Expr::Parened(e) => evaluate_trivial(e)?, _ => return None, }) } @@ -457,7 +455,6 @@ pub fn evaluate(ctx: Context, expr: &LocExpr) -> Result { Literal(LiteralType::True) => Val::Bool(true), Literal(LiteralType::False) => Val::Bool(false), Literal(LiteralType::Null) => Val::Null, - Parened(e) => evaluate(ctx, e)?, Str(v) => Val::string(v.clone()), Num(v) => Val::try_num(*v)?, // I have tried to remove special behavior from super by implementing standalone-super @@ -651,10 +648,6 @@ pub fn evaluate(ctx: Context, expr: &LocExpr) -> Result { Val::Arr(ArrValue::lazy(out)) } Obj(body) => Val::Obj(evaluate_object(ctx, body)?), - ObjExtend(a, b) => evaluate_add_op( - &evaluate(ctx.clone(), a)?, - &Val::Obj(evaluate_object(ctx, b)?), - )?, Apply(value, args, tailstrict) => ensure_sufficient_stack(|| { evaluate_apply(ctx, value, args, CallLocation::new(&loc), *tailstrict) })?, diff --git a/crates/jrsonnet-evaluator/src/evaluate/operator.rs b/crates/jrsonnet-evaluator/src/evaluate/operator.rs index 4988d219..025ef013 100644 --- a/crates/jrsonnet-evaluator/src/evaluate/operator.rs +++ b/crates/jrsonnet-evaluator/src/evaluate/operator.rs @@ -121,7 +121,7 @@ pub fn evaluate_binary_op_normal(a: &Val, op: BinaryOpType, b: &Val) -> Result evaluate_add_op(a, b)?, + (a, Add | ObjectApply, b) => evaluate_add_op(a, b)?, (a, Eq, b) => Bool(equals(a, b)?), (a, Neq, b) => Bool(!equals(a, b)?), diff --git a/crates/jrsonnet-evaluator/src/evaluate/tcvm.rs b/crates/jrsonnet-evaluator/src/evaluate/tcvm.rs new file mode 100644 index 00000000..6f9a28df --- /dev/null +++ b/crates/jrsonnet-evaluator/src/evaluate/tcvm.rs @@ -0,0 +1,349 @@ +use core::{fmt, panic}; +use std::{marker::PhantomData, task::Poll}; + +use jrsonnet_parser::{ArgsDesc, IStr, LocExpr}; + +use crate::{ + bail, + error::{ErrorKind::*, Result}, + function::CallLocation, + Context, Val, +}; + +pub enum ApplyTCO<'a> { + Eval { + in_expr: Tag<&'a LocExpr>, + in_ctx: Tag, + out_val: Tag, + }, + Apply { + in_ctx: Tag, + in_value: Tag, + in_args: &'a ArgsDesc, + in_tailstrict: bool, + out_val: Tag, + }, + PopFrame, + PushFrame { + tag: Tag, + }, +} +enum FrameTCO { + FunctionCall { name: Tag }, +} +impl FrameTCO { + fn to_string(self, vm: &mut TcVM) -> String { + match self { + FrameTCO::FunctionCall { name } => { + let name = vm.strs.pop(name); + format!("function <{name}> call") + } + } + } +} + +pub struct TcVM<'e> { + apply: Fifo>, + exprs: Fifo<&'e LocExpr>, + vals: Fifo, + ctxs: Fifo, + strs: Fifo, + frames: Fifo, + #[cfg(debug_assertions)] + pub(crate) vals_offset: usize, + #[cfg(debug_assertions)] + pub(crate) ctxs_offset: usize, + pub(crate) apply_offset: usize, + active_frames: Vec, + + init_val: Tag, +} +impl<'e> TcVM<'e> { + pub fn root(ctx: Context, expr: &'e LocExpr) -> Self { + let init_ctx = ctx_tag("init"); + let init_val = val_tag("init"); + let init_expr = expr_tag("expr"); + Self { + exprs: Fifo::single(1, expr, init_expr), + + vals: Fifo::::with_capacity(1), + ctxs: Fifo::single(1, ctx, init_ctx), + apply: Fifo::single( + 1, + ApplyTCO::Eval { + in_expr: init_expr, + in_ctx: init_ctx, + out_val: init_val, + }, + apply_tag(), + ), + strs: Fifo::with_capacity(0), + frames: Fifo::with_capacity(0), + apply_offset: 0, + #[cfg(debug_assertions)] + ctxs_offset: 0, + #[cfg(debug_assertions)] + vals_offset: 0, + active_frames: vec![], + + init_val, + } + } + fn has_apply(&self) -> bool { + self.apply.len() > self.apply_offset + } + pub fn apply(&mut self, apply: ApplyTCO<'e>) { + self.apply.push(apply, apply_tag()) + } + pub fn poll(&mut self) -> Poll> { + use ApplyTCO::*; + if !self.has_apply() { + panic!("ready tcvm shouldn't be polled again"); + } + let op = self.apply.pop(apply_tag()); + + match op { + Eval { + in_expr, + in_ctx, + out_val, + } => super::evaluate_inner(self, in_expr, in_ctx, out_val)?, + + Apply { + in_ctx, + in_value, + in_args, + in_tailstrict, + out_val, + } => { + let value = self.vals.pop(in_value); + let ctx = self.ctxs.pop(in_ctx); + match value { + Val::Func(f) => { + self.vals.push( + f.evaluate(ctx, CallLocation::native(), in_args, in_tailstrict)?, + out_val, + ); + } + v => { + return Poll::Ready(Err(OnlyFunctionsCanBeCalledGot(v.value_type()).into())) + } + } + } + PopFrame => { + self.active_frames.pop(); + } + PushFrame { tag } => { + let frame = self.frames.pop(tag); + self.active_frames.push(frame); + } + } + if self.has_apply() { + Poll::Pending + } else { + Poll::Ready(Ok(self.vals.pop(self.init_val))) + } + } +} + +pub(crate) struct Fifo { + data: Vec<(T, Tag)>, +} +impl Fifo { + pub fn with_capacity(cap: usize) -> Self { + Self { + data: Vec::with_capacity(cap), + } + } + pub fn single(cap: usize, data: T, tag: Tag) -> Self { + // eprintln!(">>> {}", tag.0); + let mut out = Self { + data: Vec::with_capacity(cap), + }; + out.push(data, tag); + out + } + pub(crate) fn push(&mut self, data: T, tag: Tag) { + // eprintln!(">>> {}", tag.0); + self.data.push((data, tag)); + } + #[track_caller] + pub(crate) fn pop(&mut self, tag: Tag) -> T { + // eprintln!("<<< {}", tag.0); + let (data, stag) = self + .data + .pop() + .unwrap_or_else(|| panic!("underflow querying for {tag:?}")); + // debug_assert doesn't work here, as it always requires PartialEq + #[cfg(debug_assertions)] + assert_eq!( + stag, tag, + "mismatched expected {tag:?} and actual {stag:?} tags", + ); + data + } + pub(crate) fn is_empty(&self) -> bool { + self.data.is_empty() + } + pub(crate) fn len(&self) -> usize { + self.data.len() + } + pub(crate) fn reserve(&mut self, size: usize) { + self.data.reserve(size) + } +} + +pub(crate) struct Tag { + #[cfg(debug_assertions)] + name: &'static str, + #[cfg(debug_assertions)] + id: u64, + _marker: PhantomData, +} +#[inline(always)] +pub(crate) fn apply_tag<'e>() -> Tag> { + #[cfg(debug_assertions)] + { + Tag { + name: "APPLY", + id: 0, + _marker: PhantomData, + } + } + #[cfg(not(debug_assertions))] + { + Tag { + _marker: PhantomData, + } + } +} +#[inline(always)] +pub(crate) fn expr_tag<'a>(name: &'static str) -> Tag<&'a LocExpr> { + #[cfg(debug_assertions)] + { + Tag { + name, + id: 0, + _marker: PhantomData, + } + } + #[cfg(not(debug_assertions))] + { + Tag { + _marker: PhantomData, + } + } +} +#[inline(always)] +pub(crate) fn val_tag(name: &'static str) -> Tag { + #[cfg(debug_assertions)] + { + Tag { + name, + id: 0, + _marker: PhantomData, + } + } + #[cfg(not(debug_assertions))] + { + Tag { + _marker: PhantomData, + } + } +} +#[inline(always)] +pub(crate) fn ctx_tag(name: &'static str) -> Tag { + #[cfg(debug_assertions)] + { + Tag { + name, + id: 0, + _marker: PhantomData, + } + } + #[cfg(not(debug_assertions))] + { + Tag { + _marker: PhantomData, + } + } +} +pub(crate) fn str_tag(name: &'static str) -> Tag { + #[cfg(debug_assertions)] + { + Tag { + name, + id: 0, + _marker: PhantomData, + } + } + #[cfg(not(debug_assertions))] + { + Tag { + _marker: PhantomData, + } + } +} +#[cfg(debug_assertions)] +impl PartialEq for Tag { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.id == other.id + } +} +impl fmt::Debug for Tag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[cfg(debug_assertions)] + { + write!(f, "Tag({})", self.name) + } + #[cfg(not(debug_assertions))] + { + write!(f, "UncheckedTag") + } + } +} +impl Clone for Tag { + fn clone(&self) -> Self { + Self { + #[cfg(debug_assertions)] + name: self.name, + #[cfg(debug_assertions)] + id: self.id.clone(), + _marker: self._marker.clone(), + } + } +} +impl Copy for Tag {} + +pub trait Pop { + fn pop(&mut self, tag: Tag) -> T; + fn push(&mut self, tag: Tag, value: T); +} + +impl Pop for TcVM<'_> { + fn pop(&mut self, tag: Tag) -> Context { + self.ctxs.pop(tag) + } + + fn push(&mut self, tag: Tag, value: Context) { + self.ctxs.push(value, tag) + } +} +impl Pop for TcVM<'_> { + fn pop(&mut self, tag: Tag) -> Val { + self.vals.pop(tag) + } + + fn push(&mut self, tag: Tag, value: Val) { + self.vals.push(value, tag) + } +} +impl<'e> Pop<&'e LocExpr> for TcVM<'e> { + fn pop(&mut self, tag: Tag<&'e LocExpr>) -> &'e LocExpr { + self.exprs.pop(tag) + } + + fn push(&mut self, tag: Tag<&'e LocExpr>, value: &'e LocExpr) { + self.exprs.push(value, tag) + } +} diff --git a/crates/jrsonnet-evaluator/src/function/mod.rs b/crates/jrsonnet-evaluator/src/function/mod.rs index b3ac9e89..22204576 100644 --- a/crates/jrsonnet-evaluator/src/function/mod.rs +++ b/crates/jrsonnet-evaluator/src/function/mod.rs @@ -225,7 +225,7 @@ impl FuncVal { #[cfg(feature = "exp-destruct")] _ => return false, }; - desc.body.expr() == &Expr::Var(id.clone()) + desc.body.expr() == &Expr::Var(id.0.clone()) } _ => false, } diff --git a/crates/jrsonnet-evaluator/src/gc.rs b/crates/jrsonnet-evaluator/src/gc.rs index 9f949087..bc1dffe2 100644 --- a/crates/jrsonnet-evaluator/src/gc.rs +++ b/crates/jrsonnet-evaluator/src/gc.rs @@ -8,7 +8,7 @@ use std::{ use hashbrown::HashMap; use jrsonnet_gcmodule::{Trace, Tracer}; -use rustc_hash::{FxHashSet, FxHasher}; +use rustc_hash::{FxBuildHasher, FxHasher}; /// Replacement for box, which assumes that the underlying type is [`Trace`] /// Used in places, where `Cc` should be used instead, but it can't, because `CoerceUnsiced` is not stable @@ -76,16 +76,13 @@ impl AsMut for TraceBox { } #[derive(Clone)] -pub struct GcHashSet(pub FxHashSet); +pub struct GcHashSet(pub HashSet); impl GcHashSet { pub fn new() -> Self { Self(HashSet::default()) } pub fn with_capacity(capacity: usize) -> Self { - Self(FxHashSet::with_capacity_and_hasher( - capacity, - BuildHasherDefault::default(), - )) + Self(HashSet::with_capacity_and_hasher(capacity, FxBuildHasher)) } } impl Trace for GcHashSet @@ -99,7 +96,7 @@ where } } impl Deref for GcHashSet { - type Target = FxHashSet; + type Target = HashSet; fn deref(&self) -> &Self::Target { &self.0 @@ -117,16 +114,13 @@ impl Default for GcHashSet { } #[derive(Debug)] -pub struct GcHashMap(pub HashMap>); +pub struct GcHashMap(pub HashMap); impl GcHashMap { pub fn new() -> Self { Self(HashMap::default()) } pub fn with_capacity(capacity: usize) -> Self { - Self(HashMap::with_capacity_and_hasher( - capacity, - BuildHasherDefault::default(), - )) + Self(HashMap::with_capacity_and_hasher(capacity, FxBuildHasher)) } } impl Trace for GcHashMap @@ -142,7 +136,7 @@ where } } impl Deref for GcHashMap { - type Target = HashMap>; + type Target = HashMap; fn deref(&self) -> &Self::Target { &self.0 diff --git a/crates/jrsonnet-evaluator/src/lib.rs b/crates/jrsonnet-evaluator/src/lib.rs index a543c93a..7bbe89fd 100644 --- a/crates/jrsonnet-evaluator/src/lib.rs +++ b/crates/jrsonnet-evaluator/src/lib.rs @@ -25,6 +25,7 @@ pub mod trace; pub mod typed; pub mod val; +mod analyze; use std::{ any::Any, cell::{RefCell, RefMut}, @@ -32,6 +33,7 @@ use std::{ path::Path, }; +pub use analyze::analyze_root; pub use ctx::*; pub use dynamic::*; pub use error::{Error, ErrorKind::*, Result, ResultExt}; @@ -100,8 +102,7 @@ pub trait ContextInitializer: Trace { self.populate(for_file, &mut builder); builder.build() } - /// For composability: extend builder. May panic if this initialization is not supported, - /// and the context may only be created via `initialize`. + /// For composability: extend builder. fn populate(&self, for_file: Source, builder: &mut ContextBuilder); /// Allows upcasting from abstract to concrete context initializer. /// jrsonnet by itself doesn't use this method, it is allowed for it to panic. diff --git a/crates/jrsonnet-macros/src/lib.rs b/crates/jrsonnet-macros/src/lib.rs index c154e7fa..ac637ad6 100644 --- a/crates/jrsonnet-macros/src/lib.rs +++ b/crates/jrsonnet-macros/src/lib.rs @@ -9,8 +9,8 @@ use syn::{ punctuated::Punctuated, spanned::Spanned, token::{self, Comma}, - Attribute, DeriveInput, Error, Expr, FnArg, GenericArgument, Ident, ItemFn, LitStr, Pat, Path, - PathArguments, Result, ReturnType, Token, Type, + Attribute, Data, DeriveInput, Error, Expr, FnArg, GenericArgument, Ident, ItemFn, LitStr, Pat, + Path, PathArguments, Result, ReturnType, Token, Type, Visibility, }; fn parse_attr(attrs: &[Attribute], ident: I) -> Result> @@ -78,6 +78,7 @@ fn extract_type_from_option(ty: &Type) -> Result> { struct Field { attrs: Vec, + vis: Visibility, name: Ident, _colon: Token![:], ty: Type, @@ -86,6 +87,7 @@ impl Parse for Field { fn parse(input: ParseStream) -> syn::Result { Ok(Self { attrs: input.call(Attribute::parse_outer)?, + vis: input.parse()?, name: input.parse()?, _colon: input.parse()?, ty: input.parse()?, @@ -799,3 +801,87 @@ pub fn format_istr(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = parse_macro_input!(input as FormatInput); input.expand().into() } + +#[proc_macro_derive(AssociatedData, attributes(associated))] +pub fn associated_data(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as DeriveInput); + + match associated_data_inner(input) { + Ok(v) => v.into(), + Err(e) => e.into_compile_error().into(), + } +} + +#[derive(Default)] +struct AssociatedValues { + values: Vec, +} +impl Parse for AssociatedValues { + fn parse(input: ParseStream) -> Result { + if input.is_empty() { + return Ok(Self::default()); + } + let p = Punctuated::::parse_terminated(&input)?; + Ok(Self { + values: p.into_iter().collect(), + }) + } +} + +fn associated_data_inner(input: DeriveInput) -> Result { + let attr: BuiltinAttrs = parse_attr(&input.attrs, "associated")? + .ok_or_else(|| Error::new(input.ident.span(), "missing #[associated] attribute"))?; + let field_count = attr.fields.len(); + + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + let id = &input.ident; + + let Data::Enum(en) = input.data else { + return Err(Error::new( + input.span(), + "only enums can have associated data", + )); + }; + + let mut var_with_tys = vec![]; + for ele in en.variants { + let attr: AssociatedValues = parse_attr(&ele.attrs, "associated")?.unwrap_or_default(); + if attr.values.len() != field_count { + return Err(Error::new(ele.span(), "mismatched number of values")); + } + // TODO: Ensure no unknown values + // TODO: fill undefined with Default::default or give error. + var_with_tys.push((ele, attr)); + } + + let items = attr.fields.into_iter().enumerate().map(|(fid, field)| { + let attrs = &field.attrs; + let id = &field.name; + let ty = &field.ty; + let vis = &field.vis; + let vars = var_with_tys.iter().map(|(var, ty)| { + let vid = &var.ident; + // TODO: Allow referencing enum fields + // let vals = &var.fields; + + let value = &ty.values[fid]; + quote! { + Self::#vid {..} => #value + } + }); + quote! { + #(#attrs)* + #vis fn #id(&self) -> #ty { + match self { + #(#vars,)* + } + } + } + }); + + Ok(quote! { + impl #impl_generics #id #ty_generics #where_clause { + #(#items)* + } + }) +} diff --git a/crates/jrsonnet-parser/Cargo.toml b/crates/jrsonnet-parser/Cargo.toml index 4bdea6da..1ed8a9b7 100644 --- a/crates/jrsonnet-parser/Cargo.toml +++ b/crates/jrsonnet-parser/Cargo.toml @@ -18,4 +18,6 @@ jrsonnet-gcmodule.workspace = true static_assertions.workspace = true -peg.workspace = true +nom.workspace = true +jrsonnet-tokenizer = { version = "0.5.0-pre96", path = "../jrsonnet-tokenizer" } +jrsonnet-macros.workspace = true diff --git a/crates/jrsonnet-parser/README.adoc b/crates/jrsonnet-parser/README.adoc index 71bde78d..1c694855 100644 --- a/crates/jrsonnet-parser/README.adoc +++ b/crates/jrsonnet-parser/README.adoc @@ -1,3 +1,5 @@ = jrsonnet-parser -Parser for jsonnet language +Nom (previously peg) based parser for jsonnet language and AST of jrsonnet. + +For CST parser see `jrsonnet-rowan-parser`. diff --git a/crates/jrsonnet-parser/src/expr.rs b/crates/jrsonnet-parser/src/expr.rs index d11ed119..baabd4b0 100644 --- a/crates/jrsonnet-parser/src/expr.rs +++ b/crates/jrsonnet-parser/src/expr.rs @@ -1,11 +1,13 @@ use std::{ + cell::RefCell, fmt::{self, Debug, Display}, - ops::Deref, + ops::{Deref, RangeInclusive}, rc::Rc, }; use jrsonnet_gcmodule::Trace; use jrsonnet_interner::IStr; +use jrsonnet_macros::AssociatedData; use crate::source::Source; @@ -53,95 +55,88 @@ pub enum Member { AssertStmt(AssertStmt), } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Trace, AssociatedData)] +#[associated(fields(pub name: &'static str, pub binding_power: ((), u8)))] pub enum UnaryOpType { + #[associated("+", ((), 20))] Plus, + #[associated("-", ((), 20))] Minus, + #[associated("~", ((), 20))] BitNot, + #[associated("!", ((), 20))] Not, } impl Display for UnaryOpType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use UnaryOpType::*; - write!( - f, - "{}", - match self { - Plus => "+", - Minus => "-", - BitNot => "~", - Not => "!", - } - ) + write!(f, "{}", self.name(),) } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Trace, AssociatedData)] +#[associated(fields(pub name: &'static str, pub binding_power: (u8, u8)))] pub enum BinaryOpType { + // Fake: inserted when `ident {objinside}` is detected, does not have an actual operator, + // but works as + + #[associated("", (20, 21))] + ObjectApply, + + #[associated("*", (18, 19))] Mul, + #[associated("/", (18, 19))] Div, - - /// Implemented as intrinsic, put here for completeness + #[associated("%", (18, 19))] Mod, + #[associated("+", (16, 17))] Add, + #[associated("-", (16, 17))] Sub, + #[associated("<<", (14, 15))] Lhs, + #[associated(">>", (14, 15))] Rhs, + #[associated("<", (12, 13))] Lt, + #[associated(">", (12, 13))] Gt, + #[associated("<=", (12, 13))] Lte, + #[associated(">=", (12, 13))] Gte, + #[associated("in", (12, 13))] + In, + #[associated("==", (10, 11))] + Eq, + #[associated("!=", (10, 11))] + Neq, + + #[associated("&", (8, 9))] BitAnd, - BitOr, + + #[associated("^", (6, 7))] BitXor, - Eq, - Neq, + #[associated("|", (4, 5))] + BitOr, + #[associated("&&", (2, 3))] And, + + #[associated("||", (0, 1))] Or, #[cfg(feature = "exp-null-coaelse")] + #[associated("??", (0, 1))] NullCoaelse, - - // Equialent to std.objectHasEx(a, b, true) - In, } impl Display for BinaryOpType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use BinaryOpType::*; - write!( - f, - "{}", - match self { - Mul => "*", - Div => "/", - Mod => "%", - Add => "+", - Sub => "-", - Lhs => "<<", - Rhs => ">>", - Lt => "<", - Gt => ">", - Lte => "<=", - Gte => ">=", - BitAnd => "&", - BitOr => "|", - BitXor => "^", - Eq => "==", - Neq => "!=", - And => "&&", - Or => "||", - In => "in", - #[cfg(feature = "exp-null-coaelse")] - NullCoaelse => "??", - } - ) + write!(f, "{}", self.name()) } } @@ -181,7 +176,7 @@ pub enum DestructRest { #[derive(Debug, Clone, PartialEq, Trace)] pub enum Destruct { - Full(IStr), + Full(Spanned), #[cfg(feature = "exp-destruct")] Skip, #[cfg(feature = "exp-destruct")] @@ -200,7 +195,7 @@ impl Destruct { /// Name of destructure, used for function parameter names pub fn name(&self) -> Option { match self { - Self::Full(name) => Some(name.clone()), + Self::Full(name) => Some(name.0.clone()), #[cfg(feature = "exp-destruct")] _ => None, } @@ -247,7 +242,8 @@ pub enum BindSpec { value: LocExpr, }, Function { - name: IStr, + // Always Destruct::Full + name: Destruct, params: ParamsDesc, value: LocExpr, }, @@ -333,11 +329,6 @@ pub enum Expr { /// Object: {a: 2} Obj(ObjBody), - /// Object extension: var1 {b: 2} - ObjExtend(LocExpr, ObjBody), - - /// (obj) - Parened(LocExpr), /// -2 UnaryOp(UnaryOpType, LocExpr), @@ -386,10 +377,33 @@ pub struct IndexPart { #[trace(skip)] #[repr(C)] pub struct Span(pub Source, pub u32, pub u32); + +thread_local! { + static CURRENT_SOURCE: RefCell> = const { RefCell::new(None) }; +} +// Only available during parsing +pub(crate) fn current_source() -> Source { + CURRENT_SOURCE + .with_borrow(|v| v.clone()) + .expect("no parsing happening right now!") +} +pub(crate) fn with_current_source(current: Source, v: impl FnOnce() -> T) -> T { + CURRENT_SOURCE.set(Some(current)); + let result = v(); + // TODO: Handle panics? + CURRENT_SOURCE.set(None); + result +} impl Span { pub fn belongs_to(&self, other: &Span) -> bool { other.0 == self.0 && other.1 <= self.1 && other.2 >= self.2 } + pub fn range(&self) -> RangeInclusive { + self.1 as usize..=self.2.saturating_sub(1).max(self.1) as usize + } + pub(crate) fn dummy() -> Self { + Self(current_source(), 0, 0) + } } static_assertions::assert_eq_size!(Span, (usize, usize)); @@ -400,6 +414,26 @@ impl Debug for Span { } } +#[derive(Clone, PartialEq, Trace)] +pub struct Spanned(pub T, pub Span); +impl Spanned { + pub(crate) fn dummy(t: T) -> Self { + Self(t, Span::dummy()) + } +} +impl Debug for Spanned { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let expr = &self.0; + if f.alternate() { + write!(f, "{:#?}", expr)?; + } else { + write!(f, "{:?}", expr)?; + } + write!(f, " from {:?}", self.1)?; + Ok(()) + } +} + /// Holds AST expression and its location in source file #[derive(Clone, PartialEq, Trace)] pub struct LocExpr(Rc<(Expr, Span)>); diff --git a/crates/jrsonnet-parser/src/lib.rs b/crates/jrsonnet-parser/src/lib.rs index ce419d57..2a35f408 100644 --- a/crates/jrsonnet-parser/src/lib.rs +++ b/crates/jrsonnet-parser/src/lib.rs @@ -1,371 +1,1280 @@ #![allow(clippy::redundant_closure_call, clippy::derive_partial_eq_without_eq)] -use std::rc::Rc; +use std::{ + char, + collections::HashSet, + fmt, + marker::PhantomData, + ops::{Bound, RangeBounds, RangeFrom, RangeTo}, + rc::Rc, + slice::SliceIndex, + str::{CharIndices, Chars}, +}; -use peg::parser; +use jrsonnet_gcmodule::Trace; +use nom::{ + branch::alt, + bytes::complete::{is_a, is_not, tag, tag_no_case, take_until}, + character::complete::{alpha1, char, digit1, one_of}, + combinator::{cut, iterator, map, map_res, not, opt, peek, recognize, value}, + error::{context, ErrorKind}, + multi::{ + fold_many0, fold_many1, many0, many0_count, many1, many1_count, many_till, separated_list1, + }, + sequence::{delimited, preceded, separated_pair, terminated, tuple}, + AsBytes, Compare, FindSubstring, IResult, InputIter, InputLength, InputTake, + InputTakeAtPosition, Needed, Offset, Parser, Slice, +}; mod expr; pub use expr::*; pub use jrsonnet_interner::IStr; -pub use peg; mod location; mod source; -mod unescape; pub use location::CodeLocation; pub use source::{ Source, SourceDirectory, SourceFifo, SourceFile, SourcePath, SourcePathT, SourceVirtual, }; +use static_assertions::assert_eq_size; pub struct ParserSettings { pub source: Source, } -macro_rules! expr_bin { - ($a:ident $op:ident $b:ident) => { - Expr::BinaryOp($a, $op, $b) - }; +#[derive(Clone, Copy)] +#[repr(packed)] +pub struct Input<'i> { + // Input length is already limited by 4GB (gence u32 offsets), yet &str carries slice length around (usize), + // replacing this metadata with u32 start/end markers (maybe this should be start/len?) + input: *const u8, + start: u32, + end: u32, + _marker: PhantomData<&'i str>, } -macro_rules! expr_un { - ($op:ident $a:ident) => { - Expr::UnaryOp($op, $a) - }; + +impl<'i> fmt::Debug for Input<'i> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_str().fmt(f) + } } +type Output<'i, O> = IResult, O>; +impl<'i> Input<'i> { + fn len(&self) -> usize { + self.as_bytes().len() + } + fn is_empty(&self) -> bool { + self.start == self.end + } + fn new(str: &str) -> Self { + Self { + input: str.as_ptr(), + start: 0, + // I don't thing it makes sense to propagate this error. + // TODO: How does other jsonnet implementations handle such error? + end: str + .len() + .try_into() + .expect("parser input is limited by 4gb files"), + _marker: PhantomData, + } + } + fn _new_pos(str: &str, start: u32) -> Self { + Self { + input: str.as_ptr(), + start, -parser! { - grammar jsonnet_parser() for str { - use peg::ParseLiteral; - - rule eof() = quiet!{![_]} / expected!("") - rule eol() = "\n" / eof() - - /// Standard C-like comments - rule comment() - = "//" (!eol()[_])* eol() - / "/*" ("\\*/" / "\\\\" / (!("*/")[_]))* "*/" - / "#" (!eol()[_])* eol() - - rule single_whitespace() = quiet!{([' ' | '\r' | '\n' | '\t'] / comment())} / expected!("") - rule _() = quiet!{([' ' | '\r' | '\n' | '\t']+) / comment()}* / expected!("") - - /// For comma-delimited elements - rule comma() = quiet!{_ "," _} / expected!("") - rule alpha() -> char = c:$(['_' | 'a'..='z' | 'A'..='Z']) {c.chars().next().unwrap()} - rule digit() -> char = d:$(['0'..='9']) {d.chars().next().unwrap()} - rule end_of_ident() = !['0'..='9' | '_' | 'a'..='z' | 'A'..='Z'] - /// Sequence of digits - rule uint_str() -> &'input str = a:$(digit()+) { a } - /// Number in scientific notation format - rule number() -> f64 = quiet!{a:$(uint_str() ("." uint_str())? (['e'|'E'] (s:['+'|'-'])? uint_str())?) {? a.parse().map_err(|_| "") }} / expected!("") - - /// Reserved word followed by any non-alphanumberic - rule reserved() = ("assert" / "else" / "error" / "false" / "for" / "function" / "if" / "import" / "importstr" / "importbin" / "in" / "local" / "null" / "tailstrict" / "then" / "self" / "super" / "true") end_of_ident() - rule id() -> IStr = v:$(quiet!{ !reserved() alpha() (alpha() / digit())*} / expected!("")) { v.into() } - - rule keyword(id: &'static str) -> () - = ##parse_string_literal(id) end_of_ident() - - pub rule param(s: &ParserSettings) -> expr::Param = name:destruct(s) expr:(_ "=" _ expr:expr(s){expr})? { expr::Param(name, expr) } - pub rule params(s: &ParserSettings) -> expr::ParamsDesc - = params:param(s) ** comma() comma()? { expr::ParamsDesc(Rc::new(params)) } - / { expr::ParamsDesc(Rc::new(Vec::new())) } - - pub rule arg(s: &ParserSettings) -> (Option, LocExpr) - = name:(quiet! { (s:id() _ "=" !['='] _ {s})? } / expected!("")) expr:expr(s) {(name, expr)} - - pub rule args(s: &ParserSettings) -> expr::ArgsDesc - = args:arg(s)**comma() comma()? {? - let unnamed_count = args.iter().take_while(|(n, _)| n.is_none()).count(); - let mut unnamed = Vec::with_capacity(unnamed_count); - let mut named = Vec::with_capacity(args.len() - unnamed_count); - let mut named_started = false; - for (name, value) in args { - if let Some(name) = name { - named_started = true; - named.push((name, value)); - } else { - if named_started { - return Err("") - } - unnamed.push(value); + // This method is not part of public api, and only used by this file, no risc of 4gb overflow. + end: str.len() as u32 + start, + _marker: PhantomData, + } + } + fn as_bytes(&self) -> &[u8] { + // Safety: String was constructed/sliced the same way + unsafe { std::slice::from_raw_parts(self.input, (self.end - self.start) as usize) } + } + fn to_bytes(self) -> &'i [u8] { + // Safety: String was constructed/sliced the same way + unsafe { std::slice::from_raw_parts(self.input, (self.end - self.start) as usize) } + } + fn as_str(&'i self) -> &'i str { + // Safety: This struct is instantiated from &str, and slicing checks utf-8 correctness. + unsafe { std::str::from_utf8_unchecked(self.as_bytes()) } + } + fn to_str(self) -> &'i str { + // Safety: This struct is instantiated from &str, and slicing checks utf-8 correctness. + unsafe { std::str::from_utf8_unchecked(self.to_bytes()) } + } + fn get(&self, range: T) -> Self + where + T: RangeBounds, + T: SliceIndex, + { + let start = match range.start_bound() { + Bound::Included(v) => *v, + Bound::Excluded(_) => unreachable!(), + Bound::Unbounded => 0, + }; + Self::_new_pos( + self.as_str().get(range).expect("incorrect slice range"), + start as u32 + self.start, + ) + } + unsafe fn get_unchecked(&self, range: T) -> Self + where + T: RangeBounds, + T: SliceIndex, + { + let start = match range.start_bound() { + Bound::Included(v) => *v, + Bound::Excluded(_) => unreachable!(), + Bound::Unbounded => 0, + }; + Self::_new_pos( + self.as_str().get_unchecked(range), + start as u32 + self.start, + ) + } +} +impl AsBytes for Input<'_> { + fn as_bytes(&self) -> &[u8] { + self.as_bytes() + } +} +impl InputLength for Input<'_> { + fn input_len(&self) -> usize { + self.as_bytes().len() + } +} +impl InputTake for Input<'_> { + fn take(&self, count: usize) -> Self { + self.get(..count) + } + + fn take_split(&self, count: usize) -> (Self, Self) { + (self.get(count..), self.get(..count)) + } +} +impl Compare<&str> for Input<'_> { + fn compare(&self, t: &str) -> nom::CompareResult { + self.as_str().compare(t) + } + + fn compare_no_case(&self, t: &str) -> nom::CompareResult { + self.as_str().compare_no_case(t) + } +} + +impl FindSubstring<&str> for Input<'_> { + fn find_substring(&self, substr: &str) -> Option { + self.as_str().find_substring(substr) + } +} +impl<'i> InputIter for Input<'i> { + type Item = char; + + type Iter = CharIndices<'i>; + + type IterElem = Chars<'i>; + + fn iter_indices(&self) -> Self::Iter { + self.to_str().char_indices() + } + + fn iter_elements(&self) -> Self::IterElem { + self.to_str().chars() + } + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn slice_index(&self, count: usize) -> Result { + todo!() + } +} +impl Slice> for Input<'_> { + fn slice(&self, range: RangeFrom) -> Self { + self.get(range) + } +} +impl Slice> for Input<'_> { + fn slice(&self, range: RangeTo) -> Self { + self.get(range) + } +} +impl Offset for Input<'_> { + fn offset(&self, second: &Self) -> usize { + (second.start - self.start) as usize + } +} +impl InputTakeAtPosition for Input<'_> { + type Item = char; + + fn split_at_position>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.as_str().find(predicate) { + // find() returns a byte index that is already in the slice at a char boundary + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + None => Err(nom::Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position1>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.as_str().find(predicate) { + Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))), + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + None => Err(nom::Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position_complete>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.as_str().find(predicate) { + // find() returns a byte index that is already in the slice at a char boundary + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + // the end of slice is a char boundary + None => unsafe { + Ok(( + self.get_unchecked(self.len()..), + self.get_unchecked(..self.len()), + )) + }, + } + } + + fn split_at_position1_complete>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.as_str().find(predicate) { + Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))), + // find() returns a byte index that is already in the slice at a char boundary + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + None => { + if self.is_empty() { + Err(nom::Err::Error(E::from_error_kind(*self, e))) + } else { + // the end of slice is a char boundary + unsafe { + Ok(( + self.get_unchecked(self.len()..), + self.get_unchecked(..self.len()), + )) } } - Ok(expr::ArgsDesc::new(unnamed, named)) } + } + } +} + +#[cfg(target_pointer_width = "64")] +assert_eq_size!(Input<'_>, (usize, usize)); - pub rule destruct_rest() -> expr::DestructRest - = "..." into:(_ into:id() {into})? {if let Some(into) = into { - expr::DestructRest::Keep(into) - } else {expr::DestructRest::Drop}} - pub rule destruct_array(s: &ParserSettings) -> expr::Destruct - = "[" _ start:destruct(s)**comma() rest:( - comma() _ rest:destruct_rest()? end:( - comma() end:destruct(s)**comma() (_ comma())? {end} - / comma()? {Vec::new()} - ) {(rest, end)} - / comma()? {(None, Vec::new())} - ) _ "]" {? - #[cfg(feature = "exp-destruct")] return Ok(expr::Destruct::Array { - start, - rest: rest.0, - end: rest.1, - }); - #[cfg(not(feature = "exp-destruct"))] Err("!!!experimental destructuring was not enabled") +fn ignore(parser: F) -> impl FnMut(I) -> IResult +where + F: Parser, +{ + map(parser, |_| ()) +} +fn comment(input: Input<'_>) -> Output<()> { + // peg-based parser supported escaping finishing */, but I have no idea why I tough it is possible + let multiline = delimited(tag("/*"), take_until("*/"), tag("*/")); + let singleline_hash = preceded(tag("#"), is_not("\n")); + let singleline_slash = preceded(tag("//"), is_not("\n")); + ignore(alt((multiline, singleline_hash, singleline_slash)))(input) +} +fn ws_single(input: Input<'_>) -> Output<()> { + let ws = ignore(is_a(" \n\r\t")); + alt((ws, comment))(input) +} +fn ws_mandatory(input: Input<'_>) -> Output<()> { + ignore(many1_count(ws_single))(input) +} +fn ws(input: Input<'_>) -> Output<()> { + ignore(many0_count(ws_single))(input) +} +fn in_ws<'i, O>( + rule: impl FnMut(Input<'i>) -> Output<'i, O>, +) -> impl FnMut(Input<'i>) -> Output<'i, O> { + delimited(ws, rule, ws) +} +fn n_ws<'i, O: fmt::Debug>( + rule: impl FnMut(Input<'i>) -> Output<'i, O>, +) -> impl FnMut(Input<'i>) -> Output<'i, O> { + terminated(rule, ws) +} +fn comma(input: Input<'_>) -> Output<()> { + ignore(in_ws(char(',')))(input) +} +fn equal_sign(input: Input<'_>) -> Output<()> { + ignore(in_ws(char('=')))(input) +} +fn plus_minus(input: Input<'_>) -> Output<()> { + ignore(opt(one_of("+-")))(input) +} +fn number(input: Input<'_>) -> Output { + let fract = opt(preceded(char('.'), decimal)); + let scient = opt(preceded(tag_no_case("e"), tuple((plus_minus, decimal)))); + map_res( + recognize(tuple((plus_minus, decimal, fract, scient))), + |s| s.as_str().replace('_', "").parse(), + )(input) +} +/// Like `digit1`, but allows `_` in the middle of the number +fn decimal(input: Input) -> Output<()> { + ignore(separated_list1( + char('_'), + // separated_list1 collects values into a vec. To avoid allocations here, replacing value with ZST, + // so vec works just as a counter + ignore(digit1), + ))(input) +} +fn id(input: Input<'_>) -> Output { + let start = many1_count(alt((ignore(alpha1), ignore(char('_'))))); + let rest = many0_count(alt((ignore(alpha1), ignore(digit1), ignore(char('_'))))); + map_res(recognize(tuple((start, rest))), |v: Input<'_>| { + let ident = IStr::from(v.as_str()); + if RESERVED.with(|r| r.contains(&ident)) { + return Err(ErrorKind::Tag); + } + Ok(ident) + })(input) +} +thread_local! { + static RESERVED: HashSet = [ + "assert", + "else", + "error", + "false", + "for", + "function", + "if", + "import", + "importstr", + "importbin", + "in", + "local", + "null", + "tailstrict", + "then", + "self", + "super", + "true", + ].into_iter().map(IStr::from).collect(); +} +fn keyword<'i, 'p: 'i>(kw: &'i str) -> impl FnMut(Input<'p>) -> Output<'p, ()> + 'i { + ignore(terminated( + tag(kw), + terminated( + not(alt((ignore(digit1), ignore(alpha1), ignore(char('_'))))), + ws, + ), + )) +} + +fn destruct(input: Input<'_>) -> Output { + let full = map(spanned_any(id), Destruct::Full); + #[cfg(feature = "exp-destruct")] + let rest = map(preceded(tag("..."), opt(id)), |v| { + Destruct::Rest(v.map_or(DestructRest::Drop, DestructRest::Keep)) + }); + #[cfg(feature = "exp-destruct")] + let array = map( + delimited( + char('['), + tuple(( + // Start + separated_trailing0(comma, alt((destruct, rest))), + )), + char(']'), + ), + |v| todo!(), + ); + // TODO + // let object = map(delimited(char('{'), , char('}')), f) + #[cfg(feature = "exp-destruct")] + let skip = map(char('?'), |_| Destruct::Skip); + + alt(( + full, + #[cfg(feature = "exp-destruct")] + skip, + #[cfg(feature = "exp-destruct")] + rest, + #[cfg(feature = "exp-destruct")] + array, + #[cfg(feature = "exp-destruct")] + object, + ))(input) +} + +fn expr(input: Input<'_>) -> Output { + map(expr_binding_power(0), |v| v.0)(input) +} +fn param(input: Input<'_>) -> Output { + map( + tuple((destruct, opt(preceded(equal_sign, expr)))), + |(key, value)| Param(key, value), + )(input) +} +fn params(input: Input<'_>) -> Output { + let inner = |input| { + map(separated_trailing0(comma, param), |params| { + ParamsDesc(Rc::new(params)) + })(input) + }; + + delimited(char('('), cut(inner), cut(char(')')))(input) +} +fn arg(input: Input) -> Output<(Option, LocExpr)> { + alt(( + map(expr, |v| (None, v)), + map(separated_pair(id, equal_sign, expr), |(k, v)| (Some(k), v)), + ))(input) +} +fn args(input: Input<'_>) -> Output { + let inner = |input| { + map_res(separated_trailing0(comma, arg), |args| { + let unnamed_count = args.iter().take_while(|(n, _)| n.is_none()).count(); + let mut unnamed = Vec::with_capacity(unnamed_count); + let mut named = Vec::with_capacity(args.len() - unnamed_count); + let mut named_started = false; + for (name, value) in args { + if let Some(name) = name { + named_started = true; + named.push((name, value)); + } else { + if named_started { + return Err("unexpected unnamed argument after named"); + } + unnamed.push(value); + } } - pub rule destruct_object(s: &ParserSettings) -> expr::Destruct - = "{" _ - fields:(name:id() into:(_ ":" _ into:destruct(s) {into})? default:(_ "=" _ v:expr(s) {v})? {(name, into, default)})**comma() - rest:( - comma() rest:destruct_rest()? {rest} - / comma()? {None} - ) - _ "}" {? - #[cfg(feature = "exp-destruct")] return Ok(expr::Destruct::Object { - fields, - rest, - }); - #[cfg(not(feature = "exp-destruct"))] Err("!!!experimental destructuring was not enabled") + Ok(expr::ArgsDesc::new(unnamed, named)) + })(input) + }; + delimited(char('('), context("arguments", cut(inner)), cut(char(')')))(input) +} +fn separated_trailing0<'i, O, O2>( + with: impl FnMut(Input<'i>) -> Output<'i, O2> + Copy, + del_value: impl FnMut(Input<'i>) -> Output<'i, O>, +) -> impl FnMut(Input<'i>) -> Output<'i, Vec> { + map( + opt(terminated( + separated_list1(with, del_value), + tuple((ws, opt(with))), + )), + |v| v.unwrap_or_default(), + ) +} +fn bind(input: Input<'_>) -> Output { + map( + tuple(( + destruct, + in_ws(opt(params)), + preceded(tuple((char('='), ws)), expr), + )), + |(into, params, value)| match params { + None => BindSpec::Field { into, value }, + Some(params) => BindSpec::Function { + name: into, + params, + value, + }, + }, + )(input) +} +fn assertion(input: Input<'_>) -> Output { + let (input, _) = keyword("assert")(input)?; + cut(map( + tuple((expr, opt(preceded(char(':'), expr)))), + |(a, b)| AssertStmt(a, b), + ))(input) +} +fn string_block<'i>(input: Input<'i>) -> Output<'i, IStr> { + let inner = |input: Input<'i>| -> Output<'i, IStr> { + let (input, _header) = tuple(( + // At least one newline is from the header: + // |||\t\t\t\n + // ^^^ + // ^^^^^^ - optional ws + // ^^ first NL, but there might be many ignored. + many_till(ignore(is_a(" \r\t")), char('\n')), + ))(input)?; + let (input, newlines) = many0_count(char('\n'))(input)?; + let (input, prefix) = is_a("\t ")(input)?; + + let mut whole_line = recognize(tuple((is_not("\n"), char('\n')))); + + let (input, first_line) = whole_line(input)?; + + let (input, rest_lines) = many0(alt(( + value("\n", char('\n')), + map(preceded(tag(prefix.to_str()), whole_line), |v| v.to_str()), + )))(input)?; + + let (input, _final) = tuple((opt(is_a("\t ")), tag("|||")))(input)?; + + let mut out = String::with_capacity( + newlines + first_line.len() + rest_lines.iter().copied().map(str::len).sum::(), + ); + for _ in 0..newlines { + out.push('\n'); + } + out.push_str(first_line.as_str()); + out.extend(rest_lines); + + Ok((input, out.into())) + }; + + let (input, _prefix) = tag("|||")(input)?; + + cut(inner)(input) +} + +fn hex_char(input: Input<'_>) -> Output { + map(one_of("0123456789abcdefABCDEF"), |c| match c { + '0'..='9' => c as u8 - b'0', + 'a'..='f' => c as u8 - b'a' + 10, + 'A'..='F' => c as u8 - b'A' + 10, + _ => unreachable!(), + })(input) +} +fn hex_byte(input: Input<'_>) -> Output { + map(tuple((hex_char, hex_char)), |(a, b)| (a << 4) | b)(input) +} +fn unicode_char(input: Input<'_>) -> Output { + let prefix = tag("\\u"); + + let cont = |input| { + // Tag is not Copy + let prefix = tag("\\u"); + + let mut hex_unicode_surrogate = map(tuple((hex_byte, hex_byte)), |(a, b)| { + ((a as u16) << 8) | b as u16 + }); + + let (input, first) = hex_unicode_surrogate(input)?; + let first = match first { + 0xdc00..=0xdfff => { + // FIXME: Only valid as second part of surrogate pair + return Err(nom::Err::Error(nom::error::make_error( + input, + ErrorKind::IsA, + ))); } - pub rule destruct(s: &ParserSettings) -> expr::Destruct - = v:id() {expr::Destruct::Full(v)} - / "?" {? - #[cfg(feature = "exp-destruct")] return Ok(expr::Destruct::Skip); - #[cfg(not(feature = "exp-destruct"))] Err("!!!experimental destructuring was not enabled") + n @ 0xd800..=0xdbff => (n - 0xd800) as u32, + n => return Ok((input, char::from_u32(n as u32).expect("correct"))), + }; + + let (input, _marker) = prefix(input)?; + + let (input, second) = hex_unicode_surrogate(input)?; + let second = match second { + 0xdc00..=0xdfff => (second - 0xdc00) as u32, + _ => { + // FIXME: Invalid surrogate pair + return Err(nom::Err::Error(nom::error::make_error( + input, + ErrorKind::IsA, + ))); } - / arr:destruct_array(s) {arr} - / obj:destruct_object(s) {obj} - - pub rule bind(s: &ParserSettings) -> expr::BindSpec - = into:destruct(s) _ "=" _ expr:expr(s) {expr::BindSpec::Field{into, value: expr}} - / name:id() _ "(" _ params:params(s) _ ")" _ "=" _ expr:expr(s) {expr::BindSpec::Function{name, params, value: expr}} - - pub rule assertion(s: &ParserSettings) -> expr::AssertStmt - = keyword("assert") _ cond:expr(s) msg:(_ ":" _ e:expr(s) {e})? { expr::AssertStmt(cond, msg) } - - pub rule whole_line() -> &'input str - = str:$((!['\n'][_])* "\n") {str} - pub rule string_block() -> String - = "|||" (!['\n']single_whitespace())* "\n" - empty_lines:$(['\n']*) - prefix:[' ' | '\t']+ first_line:whole_line() - lines:("\n" {"\n"} / [' ' | '\t']*<{prefix.len()}> s:whole_line() {s})* - [' ' | '\t']*<, {prefix.len() - 1}> "|||" - {let mut l = empty_lines.to_owned(); l.push_str(first_line); l.extend(lines); l} - - rule hex_char() - = quiet! { ['0'..='9' | 'a'..='f' | 'A'..='F'] } / expected!("") - - rule string_char(c: rule<()>) - = (!['\\']!c()[_])+ - / "\\\\" - / "\\u" hex_char() hex_char() hex_char() hex_char() - / "\\x" hex_char() hex_char() - / ['\\'] (quiet! { ['b' | 'f' | 'n' | 'r' | 't' | '"' | '\''] } / expected!("")) - pub rule string() -> String - = ['"'] str:$(string_char(<"\"">)*) ['"'] {? unescape::unescape(str).ok_or("")} - / ['\''] str:$(string_char(<"\'">)*) ['\''] {? unescape::unescape(str).ok_or("")} - / quiet!{ "@'" str:$(("''" / (!['\''][_]))*) "'" {str.replace("''", "'")} - / "@\"" str:$(("\"\"" / (!['"'][_]))*) "\"" {str.replace("\"\"", "\"")} - / string_block() } / expected!("") - - pub rule field_name(s: &ParserSettings) -> expr::FieldName - = name:id() {expr::FieldName::Fixed(name)} - / name:string() {expr::FieldName::Fixed(name.into())} - / "[" _ expr:expr(s) _ "]" {expr::FieldName::Dyn(expr)} - pub rule visibility() -> expr::Visibility - = ":::" {expr::Visibility::Unhide} - / "::" {expr::Visibility::Hidden} - / ":" {expr::Visibility::Normal} - pub rule field(s: &ParserSettings) -> expr::FieldMember - = name:field_name(s) _ plus:"+"? _ visibility:visibility() _ value:expr(s) {expr::FieldMember{ - name, - plus: plus.is_some(), - params: None, - visibility, - value, - }} - / name:field_name(s) _ "(" _ params:params(s) _ ")" _ visibility:visibility() _ value:expr(s) {expr::FieldMember{ - name, - plus: false, - params: Some(params), - visibility, - value, - }} - pub rule obj_local(s: &ParserSettings) -> BindSpec - = keyword("local") _ bind:bind(s) {bind} - pub rule member(s: &ParserSettings) -> expr::Member - = bind:obj_local(s) {expr::Member::BindStmt(bind)} - / assertion:assertion(s) {expr::Member::AssertStmt(assertion)} - / field:field(s) {expr::Member::Field(field)} - pub rule objinside(s: &ParserSettings) -> expr::ObjBody - = pre_locals:(b: obj_local(s) comma() {b})* &"[" field:field(s) post_locals:(comma() b:obj_local(s) {b})* _ ("," _)? forspec:forspec(s) others:(_ rest:compspec(s) {rest})? { - let mut compspecs = vec![CompSpec::ForSpec(forspec)]; - compspecs.extend(others.unwrap_or_default()); - expr::ObjBody::ObjComp(expr::ObjComp{ + }; + + Ok(( + input, + char::from_u32(((first << 10) | second) + 0x10000).expect("correct"), + )) + }; + + let (input, _marker) = prefix(input)?; + cut(cont)(input) +} +fn string_quoted(input: Input<'_>) -> Output { + #[derive(Clone, Copy)] + enum StringPart<'i> { + Raw(&'i str), + Special(char), + } + + let unicode_part = map(unicode_char, StringPart::Special); + let byte_part = map(preceded(tag("\\x"), cut(hex_byte)), |v| { + StringPart::Special(v as char) + }); + let escape_char_part = map( + preceded( + char('\\'), + cut(alt(( + value('\\', char('\\')), + value('\u{0008}', char('b')), + value('\u{000c}', char('f')), + value('\n', char('n')), + value('\r', char('r')), + value('\t', char('t')), + value('"', char('"')), + value('\'', char('\'')), + // TODO: add \x, \u for better suggestions? + ))), + ), + StringPart::Special, + ); + + let inner = |escapeend: &'static str| { + map( + fold_many0( + alt(( + map(is_not(escapeend), |v: Input<'_>| { + StringPart::Raw(v.to_str()) + }), + unicode_part, + byte_part, + escape_char_part, + )), + String::new, + |mut acc, v| { + match v { + StringPart::Raw(s) => acc.push_str(s), + StringPart::Special(c) => acc.push(c), + } + acc + }, + ), + IStr::from, + ) + }; + + let cont = |double_quote: bool| { + terminated( + inner(if double_quote { "\"\\" } else { "'\\" }), + char(if double_quote { '"' } else { '\'' }), + ) + }; + + let (input, double_quote) = alt((value(true, char('"')), value(false, char('\''))))(input)?; + + cut(cont(double_quote))(input) +} +fn string_raw(input: Input<'_>) -> Output { + #[derive(Clone, Copy)] + enum StringPart<'i> { + Raw(&'i str), + Quote, + } + + let inner = |quote: &'static str, quotequote: &'static str| { + map( + fold_many0( + alt(( + map(is_not(quote), |v: Input<'_>| StringPart::Raw(v.to_str())), + value(StringPart::Quote, tag(quotequote)), + )), + String::new, + |mut acc, v| { + match v { + StringPart::Raw(s) => acc.push_str(s), + StringPart::Quote => acc.push_str(quote), + } + acc + }, + ), + IStr::from, + ) + }; + let cont = |double_quote: bool| { + terminated( + if double_quote { + inner("\"", "\"\"") + } else { + inner("'", "''") + }, + char(if double_quote { '"' } else { '\'' }), + ) + }; + + let (input, double_quote) = preceded( + char('@'), + cut(alt((value(true, char('"')), value(false, char('\''))))), + )(input)?; + + cut(cont(double_quote))(input) +} + +fn string(input: Input<'_>) -> Output { + alt((string_quoted, string_raw, string_block))(input) +} + +fn field_name(input: Input<'_>) -> Output { + let dynamic = map(delimited(char('['), expr, char(']')), FieldName::Dyn); + let fixed = map(alt((string, id)), FieldName::Fixed); + + alt((fixed, dynamic))(input) +} + +fn visibility(input: Input<'_>) -> Output { + alt(( + value(Visibility::Unhide, tag(":::")), + value(Visibility::Hidden, tag("::")), + value(Visibility::Normal, tag(":")), + ))(input) +} +fn obj_field(input: Input<'_>) -> Output { + #[derive(Debug)] + enum FieldKind { + Field { plus: bool }, + Method { params: ParamsDesc }, + } + impl FieldKind { + fn plus(&self) -> bool { + match self { + FieldKind::Field { plus } => *plus, + FieldKind::Method { .. } => false, + } + } + fn params(self) -> Option { + match self { + FieldKind::Field { .. } => None, + FieldKind::Method { params } => Some(params), + } + } + } + let field = map(opt(tag("+")), |v| FieldKind::Field { plus: v.is_some() }); + let method = map(params, |params| FieldKind::Method { params }); + + let kind = alt((field, method)); + + map( + tuple(( + n_ws(field_name), + cut(n_ws(kind)), + cut(n_ws(visibility)), + cut(expr), + )), + |(name, kind, visibility, value)| FieldMember { + name, + plus: kind.plus(), + params: kind.params(), + visibility, + value, + }, + )(input) +} +fn obj_local(input: Input) -> Output { + let (input, _) = keyword("local")(input)?; + + cut(in_ws(bind))(input) +} +fn member(input: Input) -> Output { + alt(( + map(obj_field, Member::Field), + map(obj_local, Member::BindStmt), + map(assertion, Member::AssertStmt), + ))(input) +} +fn obj_body(input: Input) -> Output { + let inner = |input| { + let (input, members) = separated_trailing0(comma, member)(input)?; + + let (input, compspecs) = opt(compspecs)(input)?; + + Ok(( + input, + if let Some(compspecs) = compspecs { + #[derive(Clone, Copy)] + enum State { + Pre, + Post, + } + let mut state = State::Pre; + let mut pre_locals = vec![]; + let mut post_locals = vec![]; + let mut field = None::; + for member in members { + match (member, state) { + (Member::BindStmt(v), State::Pre) => pre_locals.push(v), + (Member::BindStmt(v), State::Post) => post_locals.push(v), + (Member::Field(v), State::Pre) => { + field = Some(v); + state = State::Post; + } + (Member::Field(_), State::Post) => { + // FIXME: only one field per objcomp + return Err(nom::Err::Failure(nom::error::make_error( + input, + ErrorKind::Many0, + ))); + } + (Member::AssertStmt(_), _) => { + // FIXME: asserts aren't supported in objcomp + return Err(nom::Err::Failure(nom::error::make_error( + input, + ErrorKind::Many0, + ))); + } + } + } + + ObjBody::ObjComp(ObjComp { pre_locals, - field, + field: field.ok_or_else(|| { + // FIXME: field is required + nom::Err::Failure(nom::error::make_error(input, ErrorKind::IsA)) + })?, post_locals, compspecs, }) - } - / members:(member(s) ** comma()) comma()? {expr::ObjBody::MemberList(members)} - pub rule ifspec(s: &ParserSettings) -> IfSpecData - = keyword("if") _ expr:expr(s) {IfSpecData(expr)} - pub rule forspec(s: &ParserSettings) -> ForSpecData - = keyword("for") _ id:destruct(s) _ keyword("in") _ cond:expr(s) {ForSpecData(id, cond)} - pub rule compspec(s: &ParserSettings) -> Vec - = s:(i:ifspec(s) { expr::CompSpec::IfSpec(i) } / f:forspec(s) {expr::CompSpec::ForSpec(f)} ) ** _ {s} - pub rule local_expr(s: &ParserSettings) -> Expr - = keyword("local") _ binds:bind(s) ** comma() (_ ",")? _ ";" _ expr:expr(s) { Expr::LocalExpr(binds, expr) } - pub rule string_expr(s: &ParserSettings) -> Expr - = s:string() {Expr::Str(s.into())} - pub rule obj_expr(s: &ParserSettings) -> Expr - = "{" _ body:objinside(s) _ "}" {Expr::Obj(body)} - pub rule array_expr(s: &ParserSettings) -> Expr - = "[" _ elems:(expr(s) ** comma()) _ comma()? "]" {Expr::Arr(elems)} - pub rule array_comp_expr(s: &ParserSettings) -> Expr - = "[" _ expr:expr(s) _ comma()? _ forspec:forspec(s) _ others:(others: compspec(s) _ {others})? "]" { - let mut specs = vec![CompSpec::ForSpec(forspec)]; - specs.extend(others.unwrap_or_default()); - Expr::ArrComp(expr, specs) - } - pub rule number_expr(s: &ParserSettings) -> Expr - = n:number() { expr::Expr::Num(n) } - pub rule var_expr(s: &ParserSettings) -> Expr - = n:id() { expr::Expr::Var(n) } - pub rule id_loc(s: &ParserSettings) -> LocExpr - = a:position!() n:id() b:position!() { LocExpr::new(expr::Expr::Str(n), Span(s.source.clone(), a as u32,b as u32)) } - pub rule if_then_else_expr(s: &ParserSettings) -> Expr - = cond:ifspec(s) _ keyword("then") _ cond_then:expr(s) cond_else:(_ keyword("else") _ e:expr(s) {e})? {Expr::IfElse{ - cond, - cond_then, - cond_else, - }} - - pub rule literal(s: &ParserSettings) -> Expr - = v:( - keyword("null") {LiteralType::Null} - / keyword("true") {LiteralType::True} - / keyword("false") {LiteralType::False} - / keyword("self") {LiteralType::This} - / keyword("$") {LiteralType::Dollar} - / keyword("super") {LiteralType::Super} - ) {Expr::Literal(v)} - - pub rule expr_basic(s: &ParserSettings) -> Expr - = literal(s) - - / string_expr(s) / number_expr(s) - / array_expr(s) - / obj_expr(s) - / array_expr(s) - / array_comp_expr(s) - - / keyword("importstr") _ path:expr(s) {Expr::ImportStr(path)} - / keyword("importbin") _ path:expr(s) {Expr::ImportBin(path)} - / keyword("import") _ path:expr(s) {Expr::Import(path)} - - / var_expr(s) - / local_expr(s) - / if_then_else_expr(s) - - / keyword("function") _ "(" _ params:params(s) _ ")" _ expr:expr(s) {Expr::Function(params, expr)} - / assertion:assertion(s) _ ";" _ expr:expr(s) { Expr::AssertExpr(assertion, expr) } - - / keyword("error") _ expr:expr(s) { Expr::ErrorStmt(expr) } - - rule slice_part(s: &ParserSettings) -> Option - = _ e:(e:expr(s) _{e})? {e} - pub rule slice_desc(s: &ParserSettings) -> SliceDesc - = start:slice_part(s) ":" pair:(end:slice_part(s) step:(":" e:slice_part(s){e})? {(end, step.flatten())})? { - let (end, step) = if let Some((end, step)) = pair { - (end, step) - }else{ - (None, None) - }; - - SliceDesc { start, end, step } - } + } else { + ObjBody::MemberList(members) + }, + )) + }; + delimited( + char('{'), + context("objinside", cut(in_ws(inner))), + context("object end", cut(char('}'))), + )(input) +} - rule binop(x: rule<()>) -> () - = quiet!{ x() } / expected!("") - rule unaryop(x: rule<()>) -> () - = quiet!{ x() } / expected!("") +fn compspecs(input: Input) -> Output> { + let ifspec = map(preceded(keyword("if"), cut(expr)), |v| { + CompSpec::IfSpec(IfSpecData(v)) + }); + let forspec = map( + preceded( + keyword("for"), + cut(in_ws(separated_pair(destruct, in_ws(keyword("in")), expr))), + ), + |(dest, inv)| CompSpec::ForSpec(ForSpecData(dest, inv)), + ); - rule ensure_null_coaelse() - = "" {? - #[cfg(not(feature = "exp-null-coaelse"))] return Err("!!!experimental null coaelscing was not enabled"); - #[cfg(feature = "exp-null-coaelse")] Ok(()) - } - use BinaryOpType::*; - use UnaryOpType::*; - rule expr(s: &ParserSettings) -> LocExpr - = precedence! { - start:position!() v:@ end:position!() { LocExpr::new(v, Span(s.source.clone(), start as u32, end as u32)) } - -- - a:(@) _ binop(<"||">) _ b:@ {expr_bin!(a Or b)} - a:(@) _ binop(<"??">) _ ensure_null_coaelse() b:@ { - #[cfg(feature = "exp-null-coaelse")] return expr_bin!(a NullCoaelse b); - unreachable!("ensure_null_coaelse will fail if feature is not enabled") + let spec = alt((forspec, ifspec)); + + // TODO: Ensure first spec is forspec? + fold_many1(spec, Vec::new, |mut acc: Vec<_>, v| { + acc.push(v); + acc + })(input) +} + +fn local_expr(input: Input) -> Output { + let (input, _) = keyword("local")(input)?; + + map( + cut(in_ws(separated_pair( + separated_trailing0(comma, bind), + n_ws(char(';')), + dbg("local expr", expr), + ))), + |(binds, expr)| Expr::LocalExpr(binds, expr), + )(input) +} + +fn arr_expr(input: Input) -> Output { + let inner = |input| { + let (input, elems) = separated_trailing0(comma, expr)(input)?; + let (input, specs) = opt(compspecs)(input)?; + + Ok(( + input, + if let Some(comp) = specs { + if elems.len() != 1 { + // FIXME: array forspec only supports one element + return Err(nom::Err::Failure(nom::error::make_error( + input, + ErrorKind::Many0, + ))); } - -- - a:(@) _ binop(<"&&">) _ b:@ {expr_bin!(a And b)} - -- - a:(@) _ binop(<"|">) _ b:@ {expr_bin!(a BitOr b)} - -- - a:@ _ binop(<"^">) _ b:(@) {expr_bin!(a BitXor b)} - -- - a:(@) _ binop(<"&">) _ b:@ {expr_bin!(a BitAnd b)} - -- - a:(@) _ binop(<"==">) _ b:@ {expr_bin!(a Eq b)} - a:(@) _ binop(<"!=">) _ b:@ {expr_bin!(a Neq b)} - -- - a:(@) _ binop(<"<">) _ b:@ {expr_bin!(a Lt b)} - a:(@) _ binop(<">">) _ b:@ {expr_bin!(a Gt b)} - a:(@) _ binop(<"<=">) _ b:@ {expr_bin!(a Lte b)} - a:(@) _ binop(<">=">) _ b:@ {expr_bin!(a Gte b)} - a:(@) _ binop() _ b:@ {expr_bin!(a In b)} - -- - a:(@) _ binop(<"<<">) _ b:@ {expr_bin!(a Lhs b)} - a:(@) _ binop(<">>">) _ b:@ {expr_bin!(a Rhs b)} - -- - a:(@) _ binop(<"+">) _ b:@ {expr_bin!(a Add b)} - a:(@) _ binop(<"-">) _ b:@ {expr_bin!(a Sub b)} - -- - a:(@) _ binop(<"*">) _ b:@ {expr_bin!(a Mul b)} - a:(@) _ binop(<"/">) _ b:@ {expr_bin!(a Div b)} - a:(@) _ binop(<"%">) _ b:@ {expr_bin!(a Mod b)} - -- - unaryop(<"+">) _ b:@ {expr_un!(Plus b)} - unaryop(<"-">) _ b:@ {expr_un!(Minus b)} - unaryop(<"!">) _ b:@ {expr_un!(Not b)} - unaryop(<"~">) _ b:@ {expr_un!(BitNot b)} - -- - a:(@) _ "[" _ e:slice_desc(s) _ "]" {Expr::Slice(a, e)} - indexable:(@) _ parts:index_part(s)+ {Expr::Index{indexable, parts}} - a:(@) _ "(" _ args:args(s) _ ")" ts:(_ keyword("tailstrict"))? {Expr::Apply(a, args, ts.is_some())} - a:(@) _ "{" _ body:objinside(s) _ "}" {Expr::ObjExtend(a, body)} - -- - e:expr_basic(s) {e} - "(" _ e:expr(s) _ ")" {Expr::Parened(e)} + let elem = elems.into_iter().next().expect("len == 1"); + Expr::ArrComp(elem, comp) + } else { + Expr::Arr(elems) + }, + )) + }; + delimited(char('['), cut(inner), cut(char(']')))(input) +} +fn if_then_else_expr(input: Input) -> Output { + let (input, _) = keyword("if")(input)?; + + map( + cut(tuple(( + expr, + preceded(keyword("then"), expr), + opt(preceded(keyword("else"), expr)), + ))), + |(cond, cond_then, cond_else)| Expr::IfElse { + cond: IfSpecData(cond), + cond_then, + cond_else, + }, + )(input) +} + +fn literal_expr(input: Input) -> Output { + let literal = alt(( + value(LiteralType::Null, keyword("null")), + value(LiteralType::True, keyword("true")), + value(LiteralType::False, keyword("false")), + value(LiteralType::This, keyword("self")), + value(LiteralType::Dollar, keyword("$")), + value(LiteralType::Super, keyword("super")), + )); + + map(literal, Expr::Literal)(input) +} + +fn import_expr(input: Input) -> Output { + // TODO: Parser should have this field in Import expr instead of 3 diferent expr kinds. + #[derive(Clone, Copy)] + enum ImportKind { + Normal, + String, + Binary, + } + let (input, kind) = alt(( + value(ImportKind::Normal, keyword("import")), + value(ImportKind::String, keyword("importstr")), + value(ImportKind::Binary, keyword("importbin")), + ))(input)?; + + let (input, expr) = cut(expr)(input)?; + + // TODO: Should expr type be checked here? (Only Str allowed as import operand, yet parser outputs Expr) + + Ok(( + input, + match kind { + ImportKind::Normal => Expr::Import(expr), + ImportKind::String => Expr::ImportStr(expr), + ImportKind::Binary => Expr::ImportBin(expr), + }, + )) +} +fn function_expr(input: Input) -> Output { + let (input, _) = keyword("function")(input)?; + + map(cut(tuple((params, expr))), |(params, value)| { + Expr::Function(params, value) + })(input) +} +fn assert_expr(input: Input) -> Output { + map( + separated_pair(assertion, cut(char(';')), cut(expr)), + |(ass, v)| Expr::AssertExpr(ass, v), + )(input) +} + +#[cfg(feature = "exp-null-coaelse")] +fn index_part(input: Input) -> Output { + let (input, null_coaelse) = map(opt(value(true, char('?'))), |v| v.unwrap_or_default())(input)?; + + if null_coaelse { + let inner = |input| { + let (input, _) = char('.')(input)?; + + let (input, value) = alt(( + spanned(map(id, Expr::Str)), + map(delimited(char('['), expr, char(']')), |e| e), + ))(input)?; + + IndexPart { + value, + null_coaelse: true, } - pub rule index_part(s: &ParserSettings) -> IndexPart - = n:("?" _ ensure_null_coaelse())? "." _ value:id_loc(s) {IndexPart { - value, - #[cfg(feature = "exp-null-coaelse")] - null_coaelse: n.is_some(), - }} - / n:("?" _ "." _ ensure_null_coaelse())? "[" _ value:expr(s) _ "]" {IndexPart { + }; + + cut(inner)(input) + } else { + let (input, _) = char('.')(input)?; + map(cut(spanned(map(id, Expr::Str))), |value| IndexPart { value, - #[cfg(feature = "exp-null-coaelse")] - null_coaelse: n.is_some(), - }} + null_coaelse: false, + })(input) + } +} +#[cfg(not(feature = "exp-null-coaelse"))] +fn index_part(input: Input) -> Output { + let (input, _) = char('.')(input)?; + map(cut(spanned(map(id, Expr::Str))), |value| IndexPart { + value, + })(input) +} + +#[derive(Debug, Trace)] +enum Suffix { + Args(ArgsDesc, bool), + SliceOrIndex(SliceOrIndex), + Index(Vec), +} + +fn unary_op(input: Input) -> Output { + let op = |ty: UnaryOpType| value(ty, tag(ty.name())); + alt(( + op(UnaryOpType::Not), + op(UnaryOpType::Plus), + op(UnaryOpType::Minus), + op(UnaryOpType::BitNot), + ))(input) +} + +fn suffix(input: Input) -> Output> { + spanned_any(alt(( + // TODO: move tailstrict to argsdesc? + map(tuple((args, opt(keyword("tailstrict")))), |(args, ts)| { + Suffix::Args(args, ts.is_some()) + }), + map(slice_or_index, Suffix::SliceOrIndex), + map(many1(index_part), Suffix::Index), + )))(input) +} - pub rule jsonnet(s: &ParserSettings) -> LocExpr = _ e:expr(s) _ {e} +fn dbg<'i, T: fmt::Debug>( + ctx: &'static str, + mut handle: impl FnMut(Input<'i>) -> Output<'i, T>, +) -> impl FnMut(Input<'i>) -> Output<'i, T> { + move |input| { + eprintln!("entered {ctx}: {input:?}"); + let value = handle(input); + eprintln!("exited {ctx}: {value:?}"); + value } } -pub type ParseError = peg::error::ParseError; +fn spanned<'i>( + mut inner: impl FnMut(Input<'i>) -> Output<'i, Expr>, +) -> impl FnMut(Input<'i>) -> Output<'i, LocExpr> { + move |input| { + let start = input.start; + let (input, value) = inner(input)?; + Ok(( + input, + LocExpr::new(value, Span(current_source(), start, input.start)), + )) + } +} +fn spanned_any<'i, T: Trace>( + mut inner: impl FnMut(Input<'i>) -> Output<'i, T>, +) -> impl FnMut(Input<'i>) -> Output<'i, Spanned> { + move |input| { + let start = input.start; + let (input, value) = inner(input)?; + Ok(( + input, + Spanned(value, Span(current_source(), start, input.start)), + )) + } +} + +fn lhs_unary_op(input: Input<'_>) -> Output { + let start = input.start; + let (input, un) = unary_op(input)?; + + let (_, right_binding_power) = un.binding_power(); + + let (input, (expr, end)) = cut(expr_binding_power(right_binding_power))(input)?; + + Ok(( + input, + LocExpr::new(Expr::UnaryOp(un, expr), Span(current_source(), start, end)), + )) +} + +fn lhs_basic(input: Input<'_>) -> Output { + alt(( + delimited(char('('), cut(expr), cut(char(')'))), + // 2. Numbers are parsed before the unary op, because I want -1 to be parsed as Num(-1), not as UnaryOp(Minus, Num(1)) + spanned(map(number, Expr::Num)), + // 1. It needs to be separated, as inner expr_binding_power consumes whitespace unnecessarily, and expression end needs to be recovered. + lhs_unary_op, + spanned(alt(( + literal_expr, + map(string, Expr::Str), + arr_expr, + map(obj_body, Expr::Obj), + import_expr, + map(id, Expr::Var), + local_expr, + if_then_else_expr, + function_expr, + assert_expr, + map(preceded(keyword("error"), cut(expr)), Expr::ErrorStmt), + ))), + ))(input) +} +fn lhs(input: Input<'_>) -> Output { + let (input, mut out) = lhs_basic(input)?; + + let mut suffixes = iterator(input, suffix); + + for Spanned(suffix, span) in suffixes.into_iter() { + out = LocExpr::new( + match suffix { + Suffix::Args(a, tailstrict) => Expr::Apply(out, a, tailstrict), + Suffix::SliceOrIndex(slice) => match slice { + SliceOrIndex::Index(i) => Expr::Index { + indexable: out, + parts: vec![IndexPart { + value: i, + #[cfg(feature = "exp-null-coaelse")] + null_coaelse: false, + }], + }, + SliceOrIndex::Slice(s) => Expr::Slice(out, s), + }, + Suffix::Index(parts) => Expr::Index { + indexable: out, + parts, + }, + }, + span, + ) + } + + let input = match suffixes.finish() { + Ok((input, ())) => input, + // Recover + Err(nom::Err::Error(nom::error::Error { input, code: _ })) => input, + Err(e) => return Err(e), + }; + Ok((input, out)) +} + +fn operator(input: Input) -> Output { + let op = |ty: BinaryOpType| value(ty, tag(ty.name())); + //Better form of operator matching should be used + alt(( + value(BinaryOpType::ObjectApply, peek(tag("{"))), + op(BinaryOpType::Mul), + op(BinaryOpType::Div), + op(BinaryOpType::Mod), + op(BinaryOpType::Add), + op(BinaryOpType::Sub), + op(BinaryOpType::Lhs), + op(BinaryOpType::Rhs), + // Prefixed by Lt-Gt + op(BinaryOpType::Lte), + op(BinaryOpType::Gte), + op(BinaryOpType::Lt), + op(BinaryOpType::Gt), + value(BinaryOpType::In, keyword("in")), + op(BinaryOpType::Eq), + op(BinaryOpType::Neq), + // Prefixed by BinAnd-BitOr + op(BinaryOpType::And), + op(BinaryOpType::Or), + op(BinaryOpType::BitAnd), + op(BinaryOpType::BitOr), + op(BinaryOpType::BitXor), + #[cfg(feature = "exp-null-coaelse")] + op(BinaryOpType::NullCoalesce), + ))(input) +} + +/// As this parser consumes whitespace after LHS, we somehow need to account for bytes consumed, +/// to do that - parser returns end of expression as the second tuple value. +fn expr_binding_power( + minimum_binding_power: u8, +) -> impl FnMut(Input<'_>) -> Output<(LocExpr, u32)> { + move |input| { + let start = input.start; + let (input, mut lhs) = lhs(input)?; + let mut end = input.start; + let (mut input, _) = ws(input)?; + + // TODO: use fold1? + while let (input_, Some(op)) = peek(opt(operator))(input)? { + input = input_; + let (left_binding_power, right_binding_power) = op.binding_power(); + + if left_binding_power < minimum_binding_power { + break; + } + + // Maybe `operator` combinator should also handle binding power? + let (input_, op2) = n_ws(operator)(input)?; + input = input_; + debug_assert_eq!(op, op2, "first time we peeked, then we popd"); + + let (input_, (rhs, end_)) = cut(expr_binding_power(right_binding_power))(input)?; + input = input_; + end = end_; + + lhs = LocExpr::new( + Expr::BinaryOp(lhs, op, rhs), + Span(current_source(), start, end), + ); + } + + Ok((input, (lhs, end))) + } +} + +#[derive(Debug, Trace)] +enum SliceOrIndex { + Index(LocExpr), + Slice(SliceDesc), +} + +fn slice_or_index(input: Input) -> Output { + let inner = |input| { + let (input, start) = opt(expr)(input)?; + + let (input, start_del) = opt(char(':'))(input)?; + + if start_del.is_some() { + let (input, (end, step)) = + tuple((opt(expr), opt(preceded(char(':'), opt(expr)))))(input)?; + + let step = step.flatten(); + + Ok((input, SliceOrIndex::Slice(SliceDesc { start, end, step }))) + } else { + Ok(( + input, + SliceOrIndex::Index(start.ok_or_else(|| { + // FIXME: missing expression + nom::Err::Failure(nom::error::make_error(input, ErrorKind::Tag)) + })?), + )) + } + }; + delimited(char('['), cut(inner), cut(char(']')))(input) +} + +pub type ParseError = nom::Err>; pub fn parse(str: &str, settings: &ParserSettings) -> Result { - jsonnet_parser::jsonnet(str, settings) + with_current_source(settings.source.clone(), || { + let (input, out) = match in_ws(expr)(Input::new(str)) { + Ok(v) => v, + Err(e) => { + panic!("failed: {e:#}"); + } + }; + assert_eq!(input.as_str(), "", "some input was not eaten"); + Ok(out) + }) } /// Used for importstr values pub fn string_to_expr(str: IStr, settings: &ParserSettings) -> LocExpr { @@ -405,6 +1314,18 @@ pub mod tests { ) }; } + macro_rules! sp { + ($expr:expr, $from:expr, $to:expr$(,)?) => { + Spanned( + $expr, + Span( + Source::new_virtual("".into(), IStr::empty()), + $from, + $to, + ), + ) + }; + } #[test] fn multiline_string() { @@ -519,7 +1440,7 @@ pub mod tests { #[test] fn basic_math_with_indents() { assert_eq!( - parse!("2 + 2 * 2 "), + parse!("2 + 2 * 2 "), el!( Expr::BinaryOp( el!(Expr::Num(2.0), 0, 1), @@ -545,25 +1466,21 @@ pub mod tests { el!(Expr::Num(2.0), 0, 1), Add, el!( - Expr::Parened(el!( - Expr::BinaryOp( - el!(Expr::Num(2.0), 3, 4), - Add, - el!( - Expr::BinaryOp( - el!(Expr::Num(2.0), 5, 6), - Mul, - el!(Expr::Num(2.0), 7, 8), - ), - 5, - 8 + Expr::BinaryOp( + el!(Expr::Num(2.0), 3, 4), + Add, + el!( + Expr::BinaryOp( + el!(Expr::Num(2.0), 5, 6), + Mul, + el!(Expr::Num(2.0), 7, 8), ), + 5, + 8 ), - 3, - 8 - )), - 2, - 9 + ), + 3, + 8 ), ), 0, @@ -597,18 +1514,19 @@ pub mod tests { ); } - /// Comments should be able to be escaped - #[test] - fn comment_escaping() { - assert_eq!( - parse!("2/*\\*/+*/ - 22"), - el!( - Expr::BinaryOp(el!(Expr::Num(2.0), 0, 1), Sub, el!(Expr::Num(22.0), 12, 14)), - 0, - 14 - ) - ); - } + /// Comments should be able to be escaped (This behavior is not present in upstream jsonnet, I have no ide why I had + /// implemented that, it is pretty ugly to be used) + // #[test] + // fn comment_escaping() { + // assert_eq!( + // parse!("2/*\\*/+*/ - 22"), + // el!( + // Expr::BinaryOp(el!(Expr::Num(2.0), 0, 1), Sub, el!(Expr::Num(22.0), 12, 14)), + // 0, + // 14 + // ) + // ); + // } #[test] fn suffix() { @@ -640,17 +1558,17 @@ pub mod tests { null_coaelse: false, }], }, - 1, + 4, 13 ), ArgsDesc::new(vec![el!(Var("x".into()), 14, 15)], vec![]), false, ), - 1, + 13, 16 ), vec![CompSpec::ForSpec(ForSpecData( - Destruct::Full("x".into()), + Destruct::Full(sp!("x".into(), 21, 22)), el!(Var("arr".into()), 26, 29) ))] ), @@ -745,33 +1663,31 @@ pub mod tests { #[test] fn add_location_info_to_all_sub_expressions() { use Expr::*; - - let file_name = Source::new_virtual("".into(), IStr::empty()); - let expr = parse( - "{} { local x = 1, x: x } + {}", - &ParserSettings { source: file_name }, - ) - .unwrap(); assert_eq!( - expr, + parse!("{} { local x = 1, x: x } + {}"), el!( BinaryOp( el!( - ObjExtend( + BinaryOp( el!(Obj(ObjBody::MemberList(vec![])), 0, 2), - ObjBody::MemberList(vec![ - Member::BindStmt(BindSpec::Field { - into: Destruct::Full("x".into()), - value: el!(Num(1.0), 15, 16) - }), - Member::Field(FieldMember { - name: FieldName::Fixed("x".into()), - plus: false, - params: None, - visibility: Visibility::Normal, - value: el!(Var("x".into()), 21, 22), - }) - ]) + BinaryOpType::ObjectApply, + el!( + Obj(ObjBody::MemberList(vec![ + Member::BindStmt(BindSpec::Field { + into: Destruct::Full(sp!("x".into(), 11, 12)), + value: el!(Num(1.0), 15, 16) + }), + Member::Field(FieldMember { + name: FieldName::Fixed("x".into()), + plus: false, + params: None, + visibility: Visibility::Normal, + value: el!(Var("x".into()), 21, 22), + }) + ])), + 3, + 24 + ), ), 0, 24 @@ -784,4 +1700,10 @@ pub mod tests { ), ); } + #[test] + fn num() { + use Expr::*; + assert_eq!(parse!("-1"), el!(Num(-1.0,), 0, 2)); + assert_eq!(parse!("-1_0"), el!(Num(-10.0,), 0, 4)); + } } diff --git a/crates/jrsonnet-parser/src/source.rs b/crates/jrsonnet-parser/src/source.rs index cb54de48..b8545954 100644 --- a/crates/jrsonnet-parser/src/source.rs +++ b/crates/jrsonnet-parser/src/source.rs @@ -79,8 +79,13 @@ any_ext!(SourcePathT); /// search location is applicable /// /// Resolver may also return custom implementations of this trait, for example it may return http url in case of remotely loaded files -#[derive(Eq, Debug, Clone)] +#[derive(Eq, Clone)] pub struct SourcePath(Rc); +impl Debug for SourcePath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.0) + } +} impl SourcePath { pub fn new(inner: impl SourcePathT) -> Self { Self(Rc::new(inner)) @@ -208,8 +213,14 @@ impl SourcePathT for SourceDirectory { /// /// It is used for --ext-code=.../--tla-code=.../standard library source code by default, /// and user can construct arbitrary values by hand, without asking import resolver -#[derive(Trace, Hash, PartialEq, Eq, Debug, Clone)] +#[derive(Trace, Hash, PartialEq, Eq, Clone)] pub struct SourceVirtual(pub IStr); + +impl Debug for SourceVirtual { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} impl Display for SourceVirtual { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) @@ -258,9 +269,15 @@ impl SourcePathT for SourceFifo { /// Either real file, or virtual /// Hash of FileName always have same value as raw Path, to make it possible to use with raw_entry_mut -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq)] pub struct Source(pub Rc<(SourcePath, IStr)>); +impl Debug for Source { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.0 .0) + } +} + impl Trace for Source { fn trace(&self, _tracer: &mut Tracer) {} diff --git a/crates/jrsonnet-parser/src/unescape.rs b/crates/jrsonnet-parser/src/unescape.rs deleted file mode 100644 index d8624329..00000000 --- a/crates/jrsonnet-parser/src/unescape.rs +++ /dev/null @@ -1,55 +0,0 @@ -use std::str::Chars; - -fn decode_unicode(chars: &mut Chars) -> Option { - IntoIterator::into_iter([chars.next()?, chars.next()?, chars.next()?, chars.next()?]) - .map(|c| c.to_digit(16).map(|f| f as u16)) - .try_fold(0u16, |acc, v| Some((acc << 4) | (v?))) -} - -pub fn unescape(s: &str) -> Option { - let mut chars = s.chars(); - let mut out = String::with_capacity(s.len()); - - while let Some(c) = chars.next() { - if c != '\\' { - out.push(c); - continue; - } - match chars.next()? { - c @ ('\\' | '"' | '\'') => out.push(c), - 'b' => out.push('\u{0008}'), - 'f' => out.push('\u{000c}'), - 'n' => out.push('\n'), - 'r' => out.push('\r'), - 't' => out.push('\t'), - 'u' => match decode_unicode(&mut chars)? { - // May only be second byte - 0xDC00..=0xDFFF => return None, - // Surrogate pair - n1 @ 0xD800..=0xDBFF => { - if chars.next() != Some('\\') { - return None; - } - if chars.next() != Some('u') { - return None; - } - let n2 = decode_unicode(&mut chars)?; - if !matches!(n2, 0xDC00..=0xDFFF) { - return None; - } - let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; - out.push(char::from_u32(n)?); - } - n => out.push(char::from_u32(n as u32)?), - }, - 'x' => { - let c = IntoIterator::into_iter([chars.next()?, chars.next()?]) - .map(|c| c.to_digit(16)) - .try_fold(0u32, |acc, v| Some((acc << 8) | (v?)))?; - out.push(char::from_u32(c)?) - } - _ => return None, - } - } - Some(out) -} diff --git a/crates/jrsonnet-pkg/Cargo.toml b/crates/jrsonnet-pkg/Cargo.toml new file mode 100644 index 00000000..ec79c77e --- /dev/null +++ b/crates/jrsonnet-pkg/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "jrsonnet-pkg" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +version.workspace = true + +[dependencies] + +[lints] +workspace = true diff --git a/crates/jrsonnet-pkg/src/lib.rs b/crates/jrsonnet-pkg/src/lib.rs new file mode 100644 index 00000000..06d268d0 --- /dev/null +++ b/crates/jrsonnet-pkg/src/lib.rs @@ -0,0 +1,14 @@ +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} diff --git a/crates/jrsonnet-rowan-parser/src/generated/mod.rs b/crates/jrsonnet-rowan-parser/src/generated/mod.rs index 3e46ca18..83cf22fb 100644 --- a/crates/jrsonnet-rowan-parser/src/generated/mod.rs +++ b/crates/jrsonnet-rowan-parser/src/generated/mod.rs @@ -1,2 +1,2 @@ pub mod nodes; -pub mod syntax_kinds; +// pub mod syntax_kinds; diff --git a/crates/jrsonnet-rowan-parser/src/lex.rs b/crates/jrsonnet-rowan-parser/src/lex.rs index 77961a2c..8b137891 100644 --- a/crates/jrsonnet-rowan-parser/src/lex.rs +++ b/crates/jrsonnet-rowan-parser/src/lex.rs @@ -1,81 +1 @@ -use core::ops::Range; -use std::convert::TryFrom; -use logos::Logos; -use rowan::{TextRange, TextSize}; - -use crate::{ - string_block::{lex_str_block, StringBlockError}, - SyntaxKind, -}; - -pub struct Lexer<'a> { - inner: logos::Lexer<'a, SyntaxKind>, -} - -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Self { - Self { - inner: SyntaxKind::lexer(input), - } - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Lexeme<'a>; - - fn next(&mut self) -> Option { - use SyntaxKind::*; - - let mut kind = self.inner.next()?; - let text = self.inner.slice(); - - if kind == Ok(STRING_BLOCK) { - // We use custom lexer, which skips enough bytes, but not returns error - // Instead we should call lexer again to verify if there is something wrong with string block - let mut lexer = logos::Lexer::::new(text); - // In kinds, string blocks is parsed at least as `|||` - lexer.bump(3); - let res = lex_str_block(&mut lexer); - let next = lexer.next(); - assert!(next.is_none(), "str_block is lexed"); - match res { - Ok(()) => {} - Err(e) => { - kind = Ok(match e { - StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END, - StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE, - StringBlockError::MissingTermination => { - ERROR_STRING_BLOCK_MISSING_TERMINATION - } - StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT, - }); - } - } - } - - Some(Self::Item { - kind: kind.unwrap_or(SyntaxKind::LEXING_ERROR), - text, - range: { - let Range { start, end } = self.inner.span(); - - TextRange::new( - TextSize::try_from(start).unwrap(), - TextSize::try_from(end).unwrap(), - ) - }, - }) - } -} - -#[derive(Clone, Copy, Debug)] -pub struct Lexeme<'i> { - pub kind: SyntaxKind, - pub text: &'i str, - pub range: TextRange, -} - -pub fn lex(input: &str) -> Vec> { - Lexer::new(input).collect() -} diff --git a/crates/jrsonnet-rowan-parser/src/lib.rs b/crates/jrsonnet-rowan-parser/src/lib.rs index b1bceef6..944de3bc 100644 --- a/crates/jrsonnet-rowan-parser/src/lib.rs +++ b/crates/jrsonnet-rowan-parser/src/lib.rs @@ -14,7 +14,7 @@ mod lex; mod marker; mod parser; mod precedence; -mod string_block; +// mod string_block; mod tests; mod token_set; diff --git a/crates/jrsonnet-tokenizer/Cargo.toml b/crates/jrsonnet-tokenizer/Cargo.toml new file mode 100644 index 00000000..0bce1af3 --- /dev/null +++ b/crates/jrsonnet-tokenizer/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "jrsonnet-tokenizer" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +version.workspace = true + +[dependencies] +logos.workspace = true +nom.workspace = true + +[lints] +workspace = true diff --git a/crates/jrsonnet-tokenizer/src/lexer.rs b/crates/jrsonnet-tokenizer/src/lexer.rs new file mode 100644 index 00000000..77af4497 --- /dev/null +++ b/crates/jrsonnet-tokenizer/src/lexer.rs @@ -0,0 +1,132 @@ +use core::ops::Range; +use std::{iter::Enumerate, marker::PhantomData, ops::RangeInclusive}; + +use logos::{Logos, Span}; +use nom::{IResult, InputIter, InputTake, Needed}; + +// use rowan::{TextRange, TextSize}; +use crate::{ + string_block::{lex_str_block, StringBlockError}, + TokenKind::{self, *}, +}; + +#[derive(Clone)] +pub struct Lexer<'a> { + inner: logos::Lexer<'a, TokenKind>, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + Self { + inner: TokenKind::lexer(input), + } + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Lexeme<'a>; + + fn next(&mut self) -> Option { + use TokenKind::*; + + let mut kind = self.inner.next()?; + let text = self.inner.slice(); + + if kind == Ok(STRING_BLOCK) { + // We use custom lexer, which skips enough bytes, but not returns error + // Instead we should call lexer again to verify if there is something wrong with string block + let mut lexer = logos::Lexer::::new(text); + // In kinds, string blocks is parsed at least as `|||` + lexer.bump(3); + let res = lex_str_block(&mut lexer); + let next = lexer.next(); + assert!(next.is_none(), "str_block is lexed"); + match res { + Ok(()) => {} + Err(e) => { + kind = Ok(match e { + StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END, + StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE, + StringBlockError::MissingTermination => { + ERROR_STRING_BLOCK_MISSING_TERMINATION + } + StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT, + }); + } + } + } + + Some(Self::Item { + kind: kind.unwrap_or(TokenKind::LEXING_ERROR), + text, + range: self.inner.span(), + }) + } +} + +#[derive(Clone, Debug)] +pub struct Lexeme<'i> { + pub kind: TokenKind, + pub text: &'i str, + pub range: Span, +} + +pub fn lex(input: &str) -> Vec> { + Lexer::new(input).collect() +} + +// impl<'i> InputIter for Lexer<'i> { +// type Item = Lexeme<'i>; +// +// type Iter = Enumerate; +// +// type IterElem = Self; +// +// fn iter_indices(&self) -> Self::Iter { +// self.clone().enumerate() +// } +// +// fn iter_elements(&self) -> Self::IterElem { +// self.clone() +// } +// +// fn position

(&self, predicate: P) -> Option +// where +// P: Fn(Self::Item) -> bool, +// { +// for (o, c) in self.iter_indices() { +// if predicate(c) { +// return Some(o); +// } +// } +// None +// } +// +// fn slice_index(&self, count: usize) -> Result { +// let mut cnt = 0; +// let mut last_end = 0; +// for (index, e) in self.iter_indices() { +// if cnt == count { +// return Ok(index); +// } +// cnt += 1; +// last_end = e.range.end; +// } +// if cnt == count { +// return Ok(last_end); +// } +// Err(Needed::Unknown) +// } +// } +// impl InputTake for Lexer<'i> { +// fn take(&self, count: usize) -> Self { +// let lex = self.inner.clone(); +// lex. +// } +// +// fn take_split(&self, count: usize) -> (Self, Self) { +// todo!() +// } +// } +// +// fn parse_tok(i: Lexer<'_>) -> IResult, ()> {} diff --git a/crates/jrsonnet-tokenizer/src/lib.rs b/crates/jrsonnet-tokenizer/src/lib.rs new file mode 100644 index 00000000..3d577ff5 --- /dev/null +++ b/crates/jrsonnet-tokenizer/src/lib.rs @@ -0,0 +1,5 @@ +mod lexer; +mod string_block; +mod syntax_kinds; +pub use lexer::Lexeme; +pub(crate) use syntax_kinds::TokenKind; diff --git a/crates/jrsonnet-rowan-parser/src/string_block.rs b/crates/jrsonnet-tokenizer/src/string_block.rs similarity index 94% rename from crates/jrsonnet-rowan-parser/src/string_block.rs rename to crates/jrsonnet-tokenizer/src/string_block.rs index db7d55e5..5cdf7ce7 100644 --- a/crates/jrsonnet-rowan-parser/src/string_block.rs +++ b/crates/jrsonnet-tokenizer/src/string_block.rs @@ -11,14 +11,14 @@ use std::ops::Range; use logos::Lexer; use StringBlockError::*; -use crate::SyntaxKind; +use crate::TokenKind; -pub fn lex_str_block_test(lex: &mut Lexer) { +pub fn lex_str_block_test(lex: &mut Lexer) { let _ = lex_str_block(lex); } #[allow(clippy::too_many_lines)] -pub fn lex_str_block(lex: &mut Lexer) -> Result<(), StringBlockError> { +pub fn lex_str_block(lex: &mut Lexer) -> Result<(), StringBlockError> { struct Context<'a> { source: &'a str, index: usize, @@ -118,7 +118,7 @@ pub fn lex_str_block(lex: &mut Lexer) -> Result<(), StringBlockError a.len() } - fn guess_token_end_and_bump<'a>(lex: &mut Lexer<'a, SyntaxKind>, ctx: &Context<'a>) { + fn guess_token_end_and_bump<'a>(lex: &mut Lexer<'a, TokenKind>, ctx: &Context<'a>) { let end_index = ctx .rest() .find("|||") diff --git a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs b/crates/jrsonnet-tokenizer/src/syntax_kinds.rs similarity index 81% rename from crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs rename to crates/jrsonnet-tokenizer/src/syntax_kinds.rs index 156bc9c8..32c6db47 100644 --- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs +++ b/crates/jrsonnet-tokenizer/src/syntax_kinds.rs @@ -12,7 +12,7 @@ use logos::Logos; #[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)] #[repr(u16)] -pub enum SyntaxKind { +pub enum TokenKind { #[doc(hidden)] TOMBSTONE, #[doc(hidden)] @@ -159,8 +159,6 @@ pub enum SyntaxKind { ERROR_KW, #[token("in")] IN_KW, - META_OBJECT_APPLY, - ERROR_NO_OPERATOR, #[token("null")] NULL_KW, #[token("true")] @@ -175,90 +173,11 @@ pub enum SyntaxKind { FOR_KW, #[token("assert")] ASSERT_KW, - ERROR_MISSING_TOKEN, - ERROR_UNEXPECTED_TOKEN, - ERROR_CUSTOM, LEXING_ERROR, __LAST_TOKEN, - SOURCE_FILE, - EXPR, - SUFFIX_INDEX, - NAME, - SUFFIX_INDEX_EXPR, - SUFFIX_SLICE, - SLICE_DESC, - SUFFIX_APPLY, - ARGS_DESC, - STMT_LOCAL, - STMT_ASSERT, - ASSERTION, - EXPR_BINARY, - EXPR_UNARY, - EXPR_OBJ_EXTEND, - EXPR_PARENED, - EXPR_LITERAL, - EXPR_STRING, - EXPR_NUMBER, - EXPR_ARRAY, - EXPR_OBJECT, - EXPR_ARRAY_COMP, - EXPR_IMPORT, - EXPR_VAR, - EXPR_IF_THEN_ELSE, - TRUE_EXPR, - FALSE_EXPR, - EXPR_FUNCTION, - PARAMS_DESC, - EXPR_ERROR, - SLICE_DESC_END, - SLICE_DESC_STEP, - ARG, - OBJ_BODY_COMP, - OBJ_BODY_MEMBER_LIST, - MEMBER_BIND_STMT, - OBJ_LOCAL, - MEMBER_ASSERT_STMT, - MEMBER_FIELD_NORMAL, - MEMBER_FIELD_METHOD, - FIELD_NAME_FIXED, - FIELD_NAME_DYNAMIC, - FOR_SPEC, - IF_SPEC, - BIND_DESTRUCT, - BIND_FUNCTION, - PARAM, - DESTRUCT_FULL, - DESTRUCT_SKIP, - DESTRUCT_ARRAY, - DESTRUCT_OBJECT, - DESTRUCT_OBJECT_FIELD, - DESTRUCT_REST, - DESTRUCT_ARRAY_ELEMENT, - SUFFIX, - BIND, - STMT, - OBJ_BODY, - COMP_SPEC, - EXPR_BASE, - MEMBER_COMP, - MEMBER, - FIELD_NAME, - DESTRUCT, - DESTRUCT_ARRAY_PART, - BINARY_OPERATOR, - UNARY_OPERATOR, - LITERAL, - TEXT, - NUMBER, - IMPORT_KIND, - VISIBILITY, - TRIVIA, - CUSTOM_ERROR, - #[doc(hidden)] - __LAST, } -use self::SyntaxKind::*; -impl SyntaxKind { +use self::TokenKind::*; +impl TokenKind { pub fn is_keyword(self) -> bool { match self { OR | NULL_COAELSE | AND | BIT_OR | BIT_XOR | BIT_AND | EQ | NE | LT | GT | LE | GE @@ -271,16 +190,8 @@ impl SyntaxKind { _ => false, } } - pub fn is_enum(self) -> bool { - match self { - SUFFIX | BIND | STMT | OBJ_BODY | COMP_SPEC | EXPR_BASE | MEMBER_COMP | MEMBER - | FIELD_NAME | DESTRUCT | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR - | LITERAL | TEXT | NUMBER | IMPORT_KIND | VISIBILITY | TRIVIA | CUSTOM_ERROR => true, - _ => false, - } - } pub fn from_raw(r: u16) -> Self { - assert!(r < Self::__LAST as u16); + assert!(r < Self::__LAST_TOKEN as u16); unsafe { std::mem::transmute(r) } } pub fn into_raw(self) -> u16 { diff --git a/crates/jrsonnet-types/Cargo.toml b/crates/jrsonnet-types/Cargo.toml index eb4dd594..386b81c8 100644 --- a/crates/jrsonnet-types/Cargo.toml +++ b/crates/jrsonnet-types/Cargo.toml @@ -13,7 +13,5 @@ workspace = true [dependencies] jrsonnet-gcmodule.workspace = true -peg.workspace = true - [features] exp-bigint = [] diff --git a/crates/jrsonnet-types/src/lib.rs b/crates/jrsonnet-types/src/lib.rs index 08dc821b..203c27ac 100644 --- a/crates/jrsonnet-types/src/lib.rs +++ b/crates/jrsonnet-types/src/lib.rs @@ -216,68 +216,6 @@ impl Display for ComplexValType { } } -peg::parser! { -pub grammar parser() for str { - rule number() -> f64 - = n:$(['0'..='9']+) { n.parse().unwrap() } - - rule any_ty() -> ComplexValType = "any" { ComplexValType::Any } - rule char_ty() -> ComplexValType = "character" { ComplexValType::Char } - rule bool_ty() -> ComplexValType = "boolean" { ComplexValType::Simple(ValType::Bool) } - rule null_ty() -> ComplexValType = "null" { ComplexValType::Simple(ValType::Null) } - rule str_ty() -> ComplexValType = "string" { ComplexValType::Simple(ValType::Str) } - rule num_ty() -> ComplexValType = "number" { ComplexValType::Simple(ValType::Num) } - rule simple_array_ty() -> ComplexValType = "array" { ComplexValType::Simple(ValType::Arr) } - rule simple_object_ty() -> ComplexValType = "object" { ComplexValType::Simple(ValType::Obj) } - rule simple_function_ty() -> ComplexValType = "function" { ComplexValType::Simple(ValType::Func) } - - rule array_ty() -> ComplexValType - = "Array<" t:ty() ">" { ComplexValType::Array(Box::new(t)) } - - rule bounded_number_ty() -> ComplexValType - = "BoundedNumber<" a:number() ", " b:number() ">" { ComplexValType::BoundedNumber(Some(a), Some(b)) } - - rule ty_basic() -> ComplexValType - = any_ty() - / char_ty() - / bool_ty() - / null_ty() - / str_ty() - / num_ty() - / simple_array_ty() - / simple_object_ty() - / simple_function_ty() - / array_ty() - / bounded_number_ty() - - pub rule ty() -> ComplexValType - = precedence! { - a:(@) " | " b:@ { - match a { - ComplexValType::Union(mut a) => { - a.push(b); - ComplexValType::Union(a) - } - _ => ComplexValType::Union(vec![a, b]), - } - } - -- - a:(@) " & " b:@ { - match a { - ComplexValType::Sum(mut a) => { - a.push(b); - ComplexValType::Sum(a) - } - _ => ComplexValType::Sum(vec![a, b]), - } - } - -- - "(" t:ty() ")" { t } - t:ty_basic() { t } - } -} -} - #[cfg(test)] pub mod tests { use super::parser; diff --git a/nix/jrsonnet.nix b/nix/jrsonnet.nix index 1090137a..4e7e81ef 100644 --- a/nix/jrsonnet.nix +++ b/nix/jrsonnet.nix @@ -11,7 +11,9 @@ with lib; src = lib.cleanSourceWith { src = ../.; filter = path: type: + # Tests use .jsonnet files. (lib.hasSuffix "\.jsonnet" path) + || (lib.hasSuffix "\.libsonnet" path) || (craneLib.filterCargoSources path type); }; pname = "jrsonnet"; @@ -19,6 +21,13 @@ with lib; cargoExtraArgs = "--locked --features=mimalloc${optionalString withNightlyFeatures ",nightly"}${optionalString withExperimentalFeatures ",experimental"}"; + env = lib.optionalAttrs withNightlyFeatures { + # Do not panic on pipe failure: https://github.com/rust-lang/rust/issues/97889 + # https://doc.rust-lang.org/nightly/unstable-book/compiler-flags/on-broken-pipe.html + # FIXME: Maybe inherit should be used here? + RUSTFLAGS = "-Zon-broken-pipe=kill"; + }; + nativeBuildInputs = [makeWrapper]; # To clean-up hyperfine output