From ef0619f0caf8b0b78ee28b219ad0ce9e1f5547c0 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Wed, 15 May 2019 13:01:50 +0300 Subject: [PATCH 1/7] Switch from `ordermap` to `indexmap`. --- Cargo.toml | 2 +- src/lib.rs | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 94d4fd1..66e0cbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,4 +13,4 @@ readme = "README.md" description = "Grammar framework." [dependencies] -ordermap = "0.3.0" +indexmap = "1" diff --git a/src/lib.rs b/src/lib.rs index b34f202..a4ace70 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ #![deny(rust_2018_idioms)] -use ordermap::{orderset, OrderMap, OrderSet}; +use indexmap::{indexset, IndexMap, IndexSet}; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::hash::Hash; @@ -9,13 +9,13 @@ use std::ops::{Add, BitAnd, BitOr}; use std::rc::Rc; pub struct Grammar { - pub rules: OrderMap>, + pub rules: IndexMap>, } impl Grammar { pub fn new() -> Self { Grammar { - rules: OrderMap::new(), + rules: IndexMap::new(), } } pub fn define(&mut self, name: &str, rule: RuleWithNamedFields) { @@ -110,25 +110,25 @@ impl Grammar { #[derive(Clone)] pub struct RuleWithNamedFields { pub rule: Rc>, - pub fields: OrderMap>>, + pub fields: IndexMap>>, } pub fn empty() -> RuleWithNamedFields { RuleWithNamedFields { rule: Rc::new(Rule::Empty), - fields: OrderMap::new(), + fields: IndexMap::new(), } } pub fn eat(pat: impl Into) -> RuleWithNamedFields { RuleWithNamedFields { rule: Rc::new(Rule::Eat(pat.into())), - fields: OrderMap::new(), + fields: IndexMap::new(), } } pub fn call(name: &str) -> RuleWithNamedFields { RuleWithNamedFields { rule: Rc::new(Rule::Call(name.to_string())), - fields: OrderMap::new(), + fields: IndexMap::new(), } } @@ -142,7 +142,7 @@ impl RuleWithNamedFields { Rule::Opt(_) => vec![0], _ => vec![], }; - self.fields.insert(name.to_string(), orderset![path]); + self.fields.insert(name.to_string(), indexset![path]); self } pub fn opt(mut self) -> Self { @@ -269,7 +269,7 @@ impl BitOr for RuleWithNamedFields { fn bitor(self, other: Self) -> Self { let (old_rules, this, mut fields) = match &*self.rule { Rule::Or(rules) => (&rules[..], None, self.fields), - _ => (&[][..], Some(self), OrderMap::new()), + _ => (&[][..], Some(self), IndexMap::new()), }; let new_rules = @@ -278,7 +278,7 @@ impl BitOr for RuleWithNamedFields { .enumerate() .map(|(i, rule)| { for (name, paths) in rule.fields { - fields.entry(name).or_insert_with(OrderSet::new).extend( + fields.entry(name).or_insert_with(IndexSet::new).extend( paths.into_iter().map(|mut path| { path.insert(0, old_rules.len() + i); path @@ -318,7 +318,7 @@ pub enum Rule { } impl Rule { - pub fn field_pathset_is_refutable(&self, paths: &OrderSet>) -> bool { + pub fn field_pathset_is_refutable(&self, paths: &IndexSet>) -> bool { if paths.len() > 1 { true } else { @@ -541,9 +541,9 @@ impl RuleWithNamedFields { fn filter_fields<'a>( &'a self, field: Option, - ) -> impl Iterator>)> + 'a { + ) -> impl Iterator>)> + 'a { self.fields.iter().filter_map(move |(name, paths)| { - let paths: OrderSet<_> = paths + let paths: IndexSet<_> = paths .iter() .filter_map(move |path| { if path.first().cloned() == field { From d742a7337dac65b0ab142b3757bf4dc57c4cf94d Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 16 May 2019 09:22:05 +0300 Subject: [PATCH 2/7] Move `Rule` and `RuleWithNamedFields` into a `rule` module. --- src/lib.rs | 561 +--------------------------------------------------- src/rule.rs | 554 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 564 insertions(+), 551 deletions(-) create mode 100644 src/rule.rs diff --git a/src/lib.rs b/src/lib.rs index a4ace70..248f9bb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,13 @@ #![deny(rust_2018_idioms)] -use indexmap::{indexset, IndexMap, IndexSet}; -use std::collections::hash_map::Entry; +use indexmap::IndexMap; use std::collections::HashMap; use std::hash::Hash; -use std::iter; -use std::ops::{Add, BitAnd, BitOr}; -use std::rc::Rc; + +pub mod rule; pub struct Grammar { - pub rules: IndexMap>, + pub rules: IndexMap>, } impl Grammar { @@ -18,472 +16,27 @@ impl Grammar { rules: IndexMap::new(), } } - pub fn define(&mut self, name: &str, rule: RuleWithNamedFields) { + pub fn define(&mut self, name: &str, rule: rule::RuleWithNamedFields) { self.rules.insert(name.to_string(), rule); } pub fn extend(&mut self, other: Self) { self.rules.extend(other.rules); } - pub fn insert_whitespace(self, whitespace: RuleWithNamedFields) -> Self + pub fn insert_whitespace(self, whitespace: rule::RuleWithNamedFields) -> Self where Pat: Clone, { - assert!(whitespace.fields.is_empty()); - - struct WhitespaceInserter { - whitespace: RuleWithNamedFields, - } - - impl Folder for WhitespaceInserter { - // FIXME(eddyb) this will insert too many whitespace rules, - // e.g. `A B? C` becomes `A WS B? WS C`, which when `B` is - // missing, is `A WS WS C`. Even worse, `A? B` ends up as - // `A? WS B`, which has an incorrect leading whitespace. - fn fold_concat( - &mut self, - left: RuleWithNamedFields, - right: RuleWithNamedFields, - ) -> RuleWithNamedFields { - left.fold(self) + self.whitespace.clone() + right.fold(self) - } - fn fold_repeat_many( - &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { - match sep { - // A* => A* % WS - None => elem - .fold(self) - .repeat_more(Some((self.whitespace.clone(), SepKind::Simple))), - // A* % B => A* % (WS B WS) - Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more(Some(( - self.whitespace.clone() + sep + self.whitespace.clone(), - SepKind::Simple, - ))), - // FIXME(cad97) this will insert too many whitespace rules - // A* %% B => ??? - // Currently, A* %% (WS B WS), which allows trailing whitespace incorrectly - Some((sep, SepKind::Trailing)) => elem.fold(self).repeat_more(Some(( - self.whitespace.clone() + sep.clone() + self.whitespace.clone(), - SepKind::Trailing, - ))), - } - } - fn fold_repeat_more( - &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { - match sep { - // A+ => A+ % WS - None => elem - .fold(self) - .repeat_more(Some((self.whitespace.clone(), SepKind::Simple))), - // A+ % B => A+ % (WS B WS) - Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more(Some(( - self.whitespace.clone() + sep + self.whitespace.clone(), - SepKind::Simple, - ))), - // A+ %% B => A+ % (WS B WS) (WS B)? - Some((sep, SepKind::Trailing)) => { - elem.fold(self).repeat_more(Some(( - self.whitespace.clone() + sep.clone() + self.whitespace.clone(), - SepKind::Simple, - ))) + (self.whitespace.clone() + sep).opt() - } - } - } - } - - let mut inserter = WhitespaceInserter { whitespace }; Grammar { rules: self .rules .into_iter() - .map(|(name, rule)| (name, rule.fold(&mut inserter))) + .map(|(name, rule)| (name, rule.insert_whitespace(whitespace.clone()))) .collect(), } } } -#[derive(Clone)] -pub struct RuleWithNamedFields { - pub rule: Rc>, - pub fields: IndexMap>>, -} - -pub fn empty() -> RuleWithNamedFields { - RuleWithNamedFields { - rule: Rc::new(Rule::Empty), - fields: IndexMap::new(), - } -} -pub fn eat(pat: impl Into) -> RuleWithNamedFields { - RuleWithNamedFields { - rule: Rc::new(Rule::Eat(pat.into())), - fields: IndexMap::new(), - } -} -pub fn call(name: &str) -> RuleWithNamedFields { - RuleWithNamedFields { - rule: Rc::new(Rule::Call(name.to_string())), - fields: IndexMap::new(), - } -} - -impl RuleWithNamedFields { - pub fn field(mut self, name: &str) -> Self { - let path = match &*self.rule { - Rule::RepeatMany(rule, _) | Rule::RepeatMore(rule, _) => match **rule { - Rule::Eat(_) | Rule::Call(_) => vec![], - _ => unimplemented!(), - }, - Rule::Opt(_) => vec![0], - _ => vec![], - }; - self.fields.insert(name.to_string(), indexset![path]); - self - } - pub fn opt(mut self) -> Self { - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) - .collect(); - self.rule = Rc::new(Rule::Opt(self.rule)); - self - } - pub fn repeat_many(mut self, sep: Option<(Self, SepKind)>) -> Self { - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) - .collect(); - if let Some((sep, _)) = &sep { - assert!(sep.fields.is_empty()); - } - self.rule = Rc::new(Rule::RepeatMany( - self.rule, - sep.map(|(sep, kind)| (sep.rule, kind)), - )); - self - } - pub fn repeat_more(mut self, sep: Option<(Self, SepKind)>) -> Self { - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) - .collect(); - if let Some((sep, _)) = &sep { - assert!(sep.fields.is_empty()); - } - self.rule = Rc::new(Rule::RepeatMore( - self.rule, - sep.map(|(sep, kind)| (sep.rule, kind)), - )); - self - } -} - -impl Add for RuleWithNamedFields { - type Output = Self; - - fn add(mut self, other: Self) -> Self { - match (&*self.rule, &*other.rule) { - (Rule::Empty, _) if self.fields.is_empty() => return other, - (_, Rule::Empty) if other.fields.is_empty() => return self, - _ => {} - } - - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) - .collect(); - for (name, paths) in other.fields { - assert!(!self.fields.contains_key(&name), "duplicate field {}", name); - self.fields.insert( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 1); - path - }) - .collect(), - ); - } - self.rule = Rc::new(Rule::Concat([self.rule, other.rule])); - self - } -} - -impl BitOr for RuleWithNamedFields { - type Output = Self; - - fn bitor(self, other: Self) -> Self { - let (old_rules, this, mut fields) = match &*self.rule { - Rule::Or(rules) => (&rules[..], None, self.fields), - _ => (&[][..], Some(self), IndexMap::new()), - }; - - let new_rules = - this.into_iter() - .chain(iter::once(other)) - .enumerate() - .map(|(i, rule)| { - for (name, paths) in rule.fields { - fields.entry(name).or_insert_with(IndexSet::new).extend( - paths.into_iter().map(|mut path| { - path.insert(0, old_rules.len() + i); - path - }), - ); - } - - rule.rule - }); - let rules = old_rules.iter().cloned().chain(new_rules).collect(); - - RuleWithNamedFields { - rule: Rc::new(Rule::Or(rules)), - fields, - } - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum SepKind { - Simple, - Trailing, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Rule { - Empty, - Eat(Pat), - Call(String), - - Concat([Rc>; 2]), - Or(Vec>>), - - Opt(Rc>), - RepeatMany(Rc>, Option<(Rc>, SepKind)>), - RepeatMore(Rc>, Option<(Rc>, SepKind)>), -} - -impl Rule { - pub fn field_pathset_is_refutable(&self, paths: &IndexSet>) -> bool { - if paths.len() > 1 { - true - } else { - self.field_is_refutable(paths.get_index(0).unwrap()) - } - } - pub fn field_is_refutable(&self, path: &[usize]) -> bool { - match self { - Rule::Empty - | Rule::Eat(_) - | Rule::Call(_) - | Rule::RepeatMany(..) - | Rule::RepeatMore(..) => false, - Rule::Concat(rules) => rules[path[0]].field_is_refutable(&path[1..]), - Rule::Or(..) | Rule::Opt(_) => true, - } - } -} - -// FIXME(eddyb) this should just work with `self: &Rc` on inherent methods, -// but that still requires `#![feature(arbitrary_self_types)]`. -trait RcRuleMethods: Sized { - fn can_be_empty( - &self, - cache: &mut HashMap>, - grammar: &Grammar, - ) -> MaybeKnown; -} - -impl RcRuleMethods for Rc> { - fn can_be_empty( - &self, - cache: &mut HashMap>, - grammar: &Grammar, - ) -> MaybeKnown { - match cache.entry(self.clone()) { - Entry::Occupied(entry) => return *entry.get(), - Entry::Vacant(entry) => { - entry.insert(MaybeKnown::Unknown); - } - }; - let r = self.can_be_empty_uncached(cache, grammar); - match r { - MaybeKnown::Known(_) => *cache.get_mut(self).unwrap() = r, - MaybeKnown::Unknown => { - cache.remove(self); - } - } - r - } -} - -impl Rule { - fn can_be_empty_uncached( - &self, - cache: &mut HashMap, MaybeKnown>, - grammar: &Grammar, - ) -> MaybeKnown { - match self { - Rule::Empty | Rule::Opt(_) | Rule::RepeatMany(..) => MaybeKnown::Known(true), - Rule::Eat(pat) => pat.matches_empty(), - Rule::Call(rule) => grammar.rules[rule].rule.can_be_empty(cache, grammar), - Rule::Concat([left, right]) => { - left.can_be_empty(cache, grammar) & right.can_be_empty(cache, grammar) - } - Rule::Or(rules) => rules.iter().fold(MaybeKnown::Known(false), |prev, rule| { - prev | rule.can_be_empty(cache, grammar) - }), - Rule::RepeatMore(elem, _) => elem.can_be_empty(cache, grammar), - } - } - - fn check_non_empty_opt( - &self, - cache: &mut HashMap, MaybeKnown>, - grammar: &Grammar, - ) { - match self { - Rule::Empty | Rule::Eat(_) | Rule::Call(_) => {} - Rule::Concat([left, right]) => { - left.check_non_empty_opt(cache, grammar); - right.check_non_empty_opt(cache, grammar); - } - Rule::Or(rules) => { - for rule in rules { - rule.check_non_empty_opt(cache, grammar); - } - } - Rule::Opt(rule) => { - assert_eq!(rule.can_be_empty(cache, grammar), MaybeKnown::Known(false)); - rule.check_non_empty_opt(cache, grammar) - } - Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { - assert_eq!(elem.can_be_empty(cache, grammar), MaybeKnown::Known(false)); - elem.check_non_empty_opt(cache, grammar); - if let Some((sep, _)) = sep { - sep.check_non_empty_opt(cache, grammar); - } - } - } - } - - fn check_call_names(&self, grammar: &Grammar) { - match self { - Rule::Empty | Rule::Eat(_) => {} - Rule::Call(rule) => { - assert!(grammar.rules.contains_key(rule), "no rule named `{}`", rule); - } - Rule::Concat([left, right]) => { - left.check_call_names(grammar); - right.check_call_names(grammar); - } - Rule::Or(rules) => { - for rule in rules { - rule.check_call_names(grammar); - } - } - Rule::Opt(rule) => rule.check_call_names(grammar), - Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { - elem.check_call_names(grammar); - if let Some((sep, _)) = sep { - sep.check_call_names(grammar); - } - } - } - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum MaybeKnown { - Known(T), - Unknown, -} - -impl BitOr for MaybeKnown { - type Output = Self; - - fn bitor(self, rhs: Self) -> Self { - match (self, rhs) { - (MaybeKnown::Known(true), _) | (_, MaybeKnown::Known(true)) => MaybeKnown::Known(true), - (MaybeKnown::Known(false), x) | (x, MaybeKnown::Known(false)) => x, - (MaybeKnown::Unknown, MaybeKnown::Unknown) => MaybeKnown::Unknown, - } - } -} - -impl BitAnd for MaybeKnown { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self { - match (self, rhs) { - (MaybeKnown::Known(false), _) | (_, MaybeKnown::Known(false)) => { - MaybeKnown::Known(false) - } - (MaybeKnown::Known(true), x) | (x, MaybeKnown::Known(true)) => x, - (MaybeKnown::Unknown, MaybeKnown::Unknown) => MaybeKnown::Unknown, - } - } -} - -pub trait MatchesEmpty { - fn matches_empty(&self) -> MaybeKnown; -} - -impl Grammar { +impl Grammar { pub fn check(&self) { for rule in self.rules.values() { rule.rule.check_call_names(self); @@ -496,107 +49,13 @@ impl Grammar { } } -pub trait Folder: Sized { - fn fold_leaf(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { - rule - } - fn fold_concat( - &mut self, - left: RuleWithNamedFields, - right: RuleWithNamedFields, - ) -> RuleWithNamedFields { - left.fold(self) + right.fold(self) - } - fn fold_or( - &mut self, - rules: impl Iterator>, - ) -> RuleWithNamedFields { - let mut rules = rules.map(|rule| rule.fold(self)); - let first = rules.next().unwrap(); - rules.fold(first, |or, rule| or | rule) - } - fn fold_opt(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { - rule.fold(self).opt() - } - fn fold_repeat_many( - &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { - elem.fold(self) - .repeat_many(sep.map(|(sep, kind)| (sep.fold(self), kind))) - } - fn fold_repeat_more( - &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { - elem.fold(self) - .repeat_more(sep.map(|(sep, kind)| (sep.fold(self), kind))) - } -} - -impl RuleWithNamedFields { - // HACK(eddyb) this is pretty expensive, find a better way - fn filter_fields<'a>( - &'a self, - field: Option, - ) -> impl Iterator>)> + 'a { - self.fields.iter().filter_map(move |(name, paths)| { - let paths: IndexSet<_> = paths - .iter() - .filter_map(move |path| { - if path.first().cloned() == field { - Some(path.get(1..).unwrap_or(&[]).to_vec()) - } else { - None - } - }) - .collect(); - if !paths.is_empty() { - Some((name.clone(), paths)) - } else { - None - } - }) - } - - pub fn fold(self, folder: &mut impl Folder) -> Self { - let field_rule = |rule: &Rc>, i| RuleWithNamedFields { - rule: rule.clone(), - fields: self.filter_fields(Some(i)).collect(), - }; - let mut rule = match &*self.rule { - Rule::Empty | Rule::Eat(_) | Rule::Call(_) => return folder.fold_leaf(self), - Rule::Concat([left, right]) => { - folder.fold_concat(field_rule(left, 0), field_rule(right, 1)) - } - Rule::Or(rules) => folder.fold_or( - rules - .iter() - .enumerate() - .map(|(i, rule)| field_rule(rule, i)), - ), - Rule::Opt(rule) => folder.fold_opt(field_rule(rule, 0)), - Rule::RepeatMany(elem, sep) => folder.fold_repeat_many( - field_rule(elem, 0), - sep.as_ref().map(|(sep, kind)| (field_rule(sep, 1), *kind)), - ), - Rule::RepeatMore(elem, sep) => folder.fold_repeat_more( - field_rule(elem, 0), - sep.as_ref().map(|(sep, kind)| (field_rule(sep, 1), *kind)), - ), - }; - rule.fields.extend(self.filter_fields(None)); - rule - } -} - /// Construct a (meta-)grammar for parsing a grammar. pub fn grammar_grammar() -> Grammar where Pat: Clone + From<&'static str>, { + use crate::rule::*; + // HACK(eddyb) more explicit subset of the grammar, for bootstrapping. macro_rules! rule { ({ $start:tt ..= $end:tt }) => { diff --git a/src/rule.rs b/src/rule.rs new file mode 100644 index 0000000..2eca398 --- /dev/null +++ b/src/rule.rs @@ -0,0 +1,554 @@ +use indexmap::{indexset, IndexMap, IndexSet}; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::hash::Hash; +use std::iter; +use std::ops::{Add, BitAnd, BitOr}; +use std::rc::Rc; + +#[derive(Clone)] +pub struct RuleWithNamedFields { + pub rule: Rc>, + pub fields: IndexMap>>, +} + +pub fn empty() -> RuleWithNamedFields { + RuleWithNamedFields { + rule: Rc::new(Rule::Empty), + fields: IndexMap::new(), + } +} +pub fn eat(pat: impl Into) -> RuleWithNamedFields { + RuleWithNamedFields { + rule: Rc::new(Rule::Eat(pat.into())), + fields: IndexMap::new(), + } +} +pub fn call(name: &str) -> RuleWithNamedFields { + RuleWithNamedFields { + rule: Rc::new(Rule::Call(name.to_string())), + fields: IndexMap::new(), + } +} + +impl RuleWithNamedFields { + pub fn field(mut self, name: &str) -> Self { + let path = match &*self.rule { + Rule::RepeatMany(rule, _) | Rule::RepeatMore(rule, _) => match **rule { + Rule::Eat(_) | Rule::Call(_) => vec![], + _ => unimplemented!(), + }, + Rule::Opt(_) => vec![0], + _ => vec![], + }; + self.fields.insert(name.to_string(), indexset![path]); + self + } + pub fn opt(mut self) -> Self { + self.fields = self + .fields + .into_iter() + .map(|(name, paths)| { + ( + name, + paths + .into_iter() + .map(|mut path| { + path.insert(0, 0); + path + }) + .collect(), + ) + }) + .collect(); + self.rule = Rc::new(Rule::Opt(self.rule)); + self + } + pub fn repeat_many(mut self, sep: Option<(Self, SepKind)>) -> Self { + self.fields = self + .fields + .into_iter() + .map(|(name, paths)| { + ( + name, + paths + .into_iter() + .map(|mut path| { + path.insert(0, 0); + path + }) + .collect(), + ) + }) + .collect(); + if let Some((sep, _)) = &sep { + assert!(sep.fields.is_empty()); + } + self.rule = Rc::new(Rule::RepeatMany( + self.rule, + sep.map(|(sep, kind)| (sep.rule, kind)), + )); + self + } + pub fn repeat_more(mut self, sep: Option<(Self, SepKind)>) -> Self { + self.fields = self + .fields + .into_iter() + .map(|(name, paths)| { + ( + name, + paths + .into_iter() + .map(|mut path| { + path.insert(0, 0); + path + }) + .collect(), + ) + }) + .collect(); + if let Some((sep, _)) = &sep { + assert!(sep.fields.is_empty()); + } + self.rule = Rc::new(Rule::RepeatMore( + self.rule, + sep.map(|(sep, kind)| (sep.rule, kind)), + )); + self + } +} + +impl Add for RuleWithNamedFields { + type Output = Self; + + fn add(mut self, other: Self) -> Self { + match (&*self.rule, &*other.rule) { + (Rule::Empty, _) if self.fields.is_empty() => return other, + (_, Rule::Empty) if other.fields.is_empty() => return self, + _ => {} + } + + self.fields = self + .fields + .into_iter() + .map(|(name, paths)| { + ( + name, + paths + .into_iter() + .map(|mut path| { + path.insert(0, 0); + path + }) + .collect(), + ) + }) + .collect(); + for (name, paths) in other.fields { + assert!(!self.fields.contains_key(&name), "duplicate field {}", name); + self.fields.insert( + name, + paths + .into_iter() + .map(|mut path| { + path.insert(0, 1); + path + }) + .collect(), + ); + } + self.rule = Rc::new(Rule::Concat([self.rule, other.rule])); + self + } +} + +impl BitOr for RuleWithNamedFields { + type Output = Self; + + fn bitor(self, other: Self) -> Self { + let (old_rules, this, mut fields) = match &*self.rule { + Rule::Or(rules) => (&rules[..], None, self.fields), + _ => (&[][..], Some(self), IndexMap::new()), + }; + + let new_rules = + this.into_iter() + .chain(iter::once(other)) + .enumerate() + .map(|(i, rule)| { + for (name, paths) in rule.fields { + fields.entry(name).or_insert_with(IndexSet::new).extend( + paths.into_iter().map(|mut path| { + path.insert(0, old_rules.len() + i); + path + }), + ); + } + + rule.rule + }); + let rules = old_rules.iter().cloned().chain(new_rules).collect(); + + RuleWithNamedFields { + rule: Rc::new(Rule::Or(rules)), + fields, + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SepKind { + Simple, + Trailing, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Rule { + Empty, + Eat(Pat), + Call(String), + + Concat([Rc>; 2]), + Or(Vec>>), + + Opt(Rc>), + RepeatMany(Rc>, Option<(Rc>, SepKind)>), + RepeatMore(Rc>, Option<(Rc>, SepKind)>), +} + +impl Rule { + pub fn field_pathset_is_refutable(&self, paths: &IndexSet>) -> bool { + if paths.len() > 1 { + true + } else { + self.field_is_refutable(paths.get_index(0).unwrap()) + } + } + pub fn field_is_refutable(&self, path: &[usize]) -> bool { + match self { + Rule::Empty + | Rule::Eat(_) + | Rule::Call(_) + | Rule::RepeatMany(..) + | Rule::RepeatMore(..) => false, + Rule::Concat(rules) => rules[path[0]].field_is_refutable(&path[1..]), + Rule::Or(..) | Rule::Opt(_) => true, + } + } +} + +// FIXME(eddyb) this should just work with `self: &Rc` on inherent methods, +// but that still requires `#![feature(arbitrary_self_types)]`. +trait RcRuleMethods: Sized { + fn can_be_empty( + &self, + cache: &mut HashMap>, + grammar: &crate::Grammar, + ) -> MaybeKnown; +} + +impl RcRuleMethods for Rc> { + fn can_be_empty( + &self, + cache: &mut HashMap>, + grammar: &crate::Grammar, + ) -> MaybeKnown { + match cache.entry(self.clone()) { + Entry::Occupied(entry) => return *entry.get(), + Entry::Vacant(entry) => { + entry.insert(MaybeKnown::Unknown); + } + }; + let r = self.can_be_empty_uncached(cache, grammar); + match r { + MaybeKnown::Known(_) => *cache.get_mut(self).unwrap() = r, + MaybeKnown::Unknown => { + cache.remove(self); + } + } + r + } +} + +impl Rule { + fn can_be_empty_uncached( + &self, + cache: &mut HashMap, MaybeKnown>, + grammar: &crate::Grammar, + ) -> MaybeKnown { + match self { + Rule::Empty | Rule::Opt(_) | Rule::RepeatMany(..) => MaybeKnown::Known(true), + Rule::Eat(pat) => pat.matches_empty(), + Rule::Call(rule) => grammar.rules[rule].rule.can_be_empty(cache, grammar), + Rule::Concat([left, right]) => { + left.can_be_empty(cache, grammar) & right.can_be_empty(cache, grammar) + } + Rule::Or(rules) => rules.iter().fold(MaybeKnown::Known(false), |prev, rule| { + prev | rule.can_be_empty(cache, grammar) + }), + Rule::RepeatMore(elem, _) => elem.can_be_empty(cache, grammar), + } + } + + pub(crate) fn check_non_empty_opt( + &self, + cache: &mut HashMap, MaybeKnown>, + grammar: &crate::Grammar, + ) { + match self { + Rule::Empty | Rule::Eat(_) | Rule::Call(_) => {} + Rule::Concat([left, right]) => { + left.check_non_empty_opt(cache, grammar); + right.check_non_empty_opt(cache, grammar); + } + Rule::Or(rules) => { + for rule in rules { + rule.check_non_empty_opt(cache, grammar); + } + } + Rule::Opt(rule) => { + assert_eq!(rule.can_be_empty(cache, grammar), MaybeKnown::Known(false)); + rule.check_non_empty_opt(cache, grammar) + } + Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { + assert_eq!(elem.can_be_empty(cache, grammar), MaybeKnown::Known(false)); + elem.check_non_empty_opt(cache, grammar); + if let Some((sep, _)) = sep { + sep.check_non_empty_opt(cache, grammar); + } + } + } + } + + pub(crate) fn check_call_names(&self, grammar: &crate::Grammar) { + match self { + Rule::Empty | Rule::Eat(_) => {} + Rule::Call(rule) => { + assert!(grammar.rules.contains_key(rule), "no rule named `{}`", rule); + } + Rule::Concat([left, right]) => { + left.check_call_names(grammar); + right.check_call_names(grammar); + } + Rule::Or(rules) => { + for rule in rules { + rule.check_call_names(grammar); + } + } + Rule::Opt(rule) => rule.check_call_names(grammar), + Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { + elem.check_call_names(grammar); + if let Some((sep, _)) = sep { + sep.check_call_names(grammar); + } + } + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum MaybeKnown { + Known(T), + Unknown, +} + +impl BitOr for MaybeKnown { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + match (self, rhs) { + (MaybeKnown::Known(true), _) | (_, MaybeKnown::Known(true)) => MaybeKnown::Known(true), + (MaybeKnown::Known(false), x) | (x, MaybeKnown::Known(false)) => x, + (MaybeKnown::Unknown, MaybeKnown::Unknown) => MaybeKnown::Unknown, + } + } +} + +impl BitAnd for MaybeKnown { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self { + match (self, rhs) { + (MaybeKnown::Known(false), _) | (_, MaybeKnown::Known(false)) => { + MaybeKnown::Known(false) + } + (MaybeKnown::Known(true), x) | (x, MaybeKnown::Known(true)) => x, + (MaybeKnown::Unknown, MaybeKnown::Unknown) => MaybeKnown::Unknown, + } + } +} + +pub trait MatchesEmpty { + fn matches_empty(&self) -> MaybeKnown; +} + +pub trait Folder: Sized { + fn fold_leaf(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { + rule + } + fn fold_concat( + &mut self, + left: RuleWithNamedFields, + right: RuleWithNamedFields, + ) -> RuleWithNamedFields { + left.fold(self) + right.fold(self) + } + fn fold_or( + &mut self, + rules: impl Iterator>, + ) -> RuleWithNamedFields { + let mut rules = rules.map(|rule| rule.fold(self)); + let first = rules.next().unwrap(); + rules.fold(first, |or, rule| or | rule) + } + fn fold_opt(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { + rule.fold(self).opt() + } + fn fold_repeat_many( + &mut self, + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { + elem.fold(self) + .repeat_many(sep.map(|(sep, kind)| (sep.fold(self), kind))) + } + fn fold_repeat_more( + &mut self, + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { + elem.fold(self) + .repeat_more(sep.map(|(sep, kind)| (sep.fold(self), kind))) + } +} + +impl RuleWithNamedFields { + // HACK(eddyb) this is pretty expensive, find a better way + fn filter_fields<'a>( + &'a self, + field: Option, + ) -> impl Iterator>)> + 'a { + self.fields.iter().filter_map(move |(name, paths)| { + let paths: IndexSet<_> = paths + .iter() + .filter_map(move |path| { + if path.first().cloned() == field { + Some(path.get(1..).unwrap_or(&[]).to_vec()) + } else { + None + } + }) + .collect(); + if !paths.is_empty() { + Some((name.clone(), paths)) + } else { + None + } + }) + } + + pub fn fold(self, folder: &mut impl Folder) -> Self { + let field_rule = |rule: &Rc>, i| RuleWithNamedFields { + rule: rule.clone(), + fields: self.filter_fields(Some(i)).collect(), + }; + let mut rule = match &*self.rule { + Rule::Empty | Rule::Eat(_) | Rule::Call(_) => return folder.fold_leaf(self), + Rule::Concat([left, right]) => { + folder.fold_concat(field_rule(left, 0), field_rule(right, 1)) + } + Rule::Or(rules) => folder.fold_or( + rules + .iter() + .enumerate() + .map(|(i, rule)| field_rule(rule, i)), + ), + Rule::Opt(rule) => folder.fold_opt(field_rule(rule, 0)), + Rule::RepeatMany(elem, sep) => folder.fold_repeat_many( + field_rule(elem, 0), + sep.as_ref().map(|(sep, kind)| (field_rule(sep, 1), *kind)), + ), + Rule::RepeatMore(elem, sep) => folder.fold_repeat_more( + field_rule(elem, 0), + sep.as_ref().map(|(sep, kind)| (field_rule(sep, 1), *kind)), + ), + }; + rule.fields.extend(self.filter_fields(None)); + rule + } + + pub fn insert_whitespace(self, whitespace: RuleWithNamedFields) -> Self + where + Pat: Clone, + { + assert!(whitespace.fields.is_empty()); + + struct WhitespaceInserter { + whitespace: RuleWithNamedFields, + } + + impl Folder for WhitespaceInserter { + // FIXME(eddyb) this will insert too many whitespace rules, + // e.g. `A B? C` becomes `A WS B? WS C`, which when `B` is + // missing, is `A WS WS C`. Even worse, `A? B` ends up as + // `A? WS B`, which has an incorrect leading whitespace. + fn fold_concat( + &mut self, + left: RuleWithNamedFields, + right: RuleWithNamedFields, + ) -> RuleWithNamedFields { + left.fold(self) + self.whitespace.clone() + right.fold(self) + } + fn fold_repeat_many( + &mut self, + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { + match sep { + // A* => A* % WS + None => elem + .fold(self) + .repeat_more(Some((self.whitespace.clone(), SepKind::Simple))), + // A* % B => A* % (WS B WS) + Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more(Some(( + self.whitespace.clone() + sep + self.whitespace.clone(), + SepKind::Simple, + ))), + // FIXME(cad97) this will insert too many whitespace rules + // A* %% B => ??? + // Currently, A* %% (WS B WS), which allows trailing whitespace incorrectly + Some((sep, SepKind::Trailing)) => elem.fold(self).repeat_more(Some(( + self.whitespace.clone() + sep.clone() + self.whitespace.clone(), + SepKind::Trailing, + ))), + } + } + fn fold_repeat_more( + &mut self, + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { + match sep { + // A+ => A+ % WS + None => elem + .fold(self) + .repeat_more(Some((self.whitespace.clone(), SepKind::Simple))), + // A+ % B => A+ % (WS B WS) + Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more(Some(( + self.whitespace.clone() + sep + self.whitespace.clone(), + SepKind::Simple, + ))), + // A+ %% B => A+ % (WS B WS) (WS B)? + Some((sep, SepKind::Trailing)) => { + elem.fold(self).repeat_more(Some(( + self.whitespace.clone() + sep.clone() + self.whitespace.clone(), + SepKind::Simple, + ))) + (self.whitespace.clone() + sep).opt() + } + } + } + } + + self.fold(&mut WhitespaceInserter { whitespace }) + } +} From 2cfe64056231288718562736d35e7d4f93e03ad9 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 16 May 2019 11:44:53 +0300 Subject: [PATCH 3/7] Introduce a context object and interning (for strings). --- src/context.rs | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 78 +++++++++++---------------------------------- src/rule.rs | 42 +++++++++++++----------- 3 files changed, 129 insertions(+), 77 deletions(-) create mode 100644 src/context.rs diff --git a/src/context.rs b/src/context.rs new file mode 100644 index 0000000..111a5e5 --- /dev/null +++ b/src/context.rs @@ -0,0 +1,86 @@ +use indexmap::IndexSet; +use std::convert::TryInto; +use std::marker::PhantomData; + +/// Context object with global resources for working with grammar, +/// such as interners. +pub struct Context { + interners: Interners, +} + +/// Dispatch helper, to allow implementing interning logic on +/// the type passed to `cx.intern(...)`. +pub trait InternInCx { + type Interned; + + fn intern_in_cx(self, cx: &mut Context) -> Self::Interned; +} + +impl Context { + pub fn new() -> Self { + Context { + interners: Interners::default(), + } + } + + pub fn intern>(&mut self, x: T) -> T::Interned { + x.intern_in_cx(self) + } +} + +macro_rules! interners { + ($($name:ident => $ty:ty),* $(,)?) => { + #[allow(non_snake_case)] + struct Interners { + $($name: IndexSet<$ty>,)* + _marker: PhantomData, + } + + impl Default for Interners { + fn default() -> Self { + Interners { + $($name: IndexSet::new(),)* + _marker: PhantomData, + } + } + } + + $( + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $name(u32); + + impl InternInCx for $ty { + type Interned = $name; + + fn intern_in_cx(self, cx: &mut Context) -> Self::Interned { + $name(cx.interners.$name.insert_full(self).0.try_into().unwrap()) + } + } + + impl std::ops::Index<$name> for Context { + type Output = $ty; + + fn index(&self, interned: $name) -> &Self::Output { + self.interners.$name.get_index(interned.0 as usize).unwrap() + } + } + )* + }; +} + +interners! { + IStr => String, +} + +impl InternInCx for &'_ str { + type Interned = IStr; + + fn intern_in_cx(self, cx: &mut Context) -> IStr { + // Avoid allocating if this string is already in the interner. + if let Some((i, _)) = cx.interners.IStr.get_full(self) { + return IStr(i.try_into().unwrap()); + } + + cx.intern(self.to_string()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 248f9bb..3de95b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,15 @@ #![deny(rust_2018_idioms)] +use crate::context::{Context, IStr}; use indexmap::IndexMap; use std::collections::HashMap; use std::hash::Hash; +pub mod context; pub mod rule; pub struct Grammar { - pub rules: IndexMap>, + pub rules: IndexMap>, } impl Grammar { @@ -16,8 +18,8 @@ impl Grammar { rules: IndexMap::new(), } } - pub fn define(&mut self, name: &str, rule: rule::RuleWithNamedFields) { - self.rules.insert(name.to_string(), rule); + pub fn define(&mut self, name: IStr, rule: rule::RuleWithNamedFields) { + self.rules.insert(name, rule); } pub fn extend(&mut self, other: Self) { self.rules.extend(other.rules); @@ -37,9 +39,9 @@ impl Grammar { } impl Grammar { - pub fn check(&self) { + pub fn check(&self, cx: &Context) { for rule in self.rules.values() { - rule.rule.check_call_names(self); + rule.rule.check_call_names(cx, self); } let mut can_be_empty_cache = HashMap::new(); @@ -50,7 +52,7 @@ impl Grammar { } /// Construct a (meta-)grammar for parsing a grammar. -pub fn grammar_grammar() -> Grammar +pub fn grammar_grammar(cx: &mut Context) -> Grammar where Pat: Clone + From<&'static str>, { @@ -68,10 +70,10 @@ where negative_lookahead($start..=$end) }; ($rule:ident) => { - call(stringify!($rule)) + call(cx.intern(stringify!($rule))) }; ({ $name:ident : $rule:tt }) => { - rule!($rule).field(stringify!($name)) + rule!($rule).field(cx.intern(stringify!($name))) }; ({ $rule:tt ? }) => { rule!($rule).opt() @@ -99,7 +101,7 @@ where macro_rules! grammar { ($($rule_name:ident = $($rule:tt)|+;)*) => ({ let mut grammar = Grammar::new(); - $(grammar.define(stringify!($rule_name), rule!({ $($rule)|+ }));)* + $(grammar.define(cx.intern(stringify!($rule_name)), rule!({ $($rule)|+ }));)* grammar }) } @@ -133,58 +135,16 @@ where }; // Lexical fragment of the grammar. - let proc_macro = true; - if proc_macro { - grammar.extend(grammar! { - FileStart = ""; - FileEnd = ""; + grammar.extend(grammar! { + FileStart = ""; + FileEnd = ""; - Ident = IDENT; + Ident = IDENT; - // FIXME(eddyb) restrict literals, once `proc_macro` allows it. - StrLit = LITERAL; - CharLit = LITERAL; - }); - } else { - // HACK(eddyb) keeping the scannerless version around for posterity. - #[allow(unused)] - fn negative_lookahead(_pat: impl Into) -> RuleWithNamedFields { - unimplemented!() - } - fn _scannerless_lexical_grammar() -> Grammar - where - Pat: Clone - + From<&'static str> - + From> - + From, - { - grammar! { - Whitespace = { - {{ - " " | "\t" | "\n" | "\r" | - { "//" {{ {!"\n"} .. }*} "\n" } | - { "/*" {{ {!"*/"} .. }*} "*/" } - }*} - {!" "} {!"\t"} {!"\n"} {!"\r"} {!"//"} {!"/*"} - }; - Shebang = { "#!" {{ {!"\n"} .. }*} "\n" }; - FileStart = {Shebang?}; - FileEnd = Whitespace; - - IdentStart = {'a'..='z'} | {'A'..='Z'} | "_"; - IdentCont = IdentStart | {'0'..='9'}; - NotIdent = { {!'a'..='z'} {!'A'..='Z'} {!"_"} {!'0'..='9'} }; - Ident = { IdentStart {IdentCont*} NotIdent }; - - StrLit = { "\"" {{ { {!"\\"} {!"\""} .. } | { "\\" Escape } }*} "\"" }; - CharLit = { "'" { { {!"\\"} {!"'"} .. } | { "\\" Escape } } "'" }; - Escape = "t" | "n" | "r" | "\\" | "'" | "\""; - } - } - // grammar = grammar.insert_whitespace(call("Whitespace")); - // grammar.extend(_scannerless_lexical_grammar()); - unimplemented!() - } + // FIXME(eddyb) restrict literals, once `proc_macro` allows it. + StrLit = LITERAL; + CharLit = LITERAL; + }); grammar } diff --git a/src/rule.rs b/src/rule.rs index 2eca398..25fa2b9 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -1,3 +1,4 @@ +use crate::context::{Context, IStr}; use indexmap::{indexset, IndexMap, IndexSet}; use std::collections::hash_map::Entry; use std::collections::HashMap; @@ -9,7 +10,7 @@ use std::rc::Rc; #[derive(Clone)] pub struct RuleWithNamedFields { pub rule: Rc>, - pub fields: IndexMap>>, + pub fields: IndexMap>>, } pub fn empty() -> RuleWithNamedFields { @@ -24,15 +25,15 @@ pub fn eat(pat: impl Into) -> RuleWithNamedFields { fields: IndexMap::new(), } } -pub fn call(name: &str) -> RuleWithNamedFields { +pub fn call(name: IStr) -> RuleWithNamedFields { RuleWithNamedFields { - rule: Rc::new(Rule::Call(name.to_string())), + rule: Rc::new(Rule::Call(name)), fields: IndexMap::new(), } } impl RuleWithNamedFields { - pub fn field(mut self, name: &str) -> Self { + pub fn field(mut self, name: IStr) -> Self { let path = match &*self.rule { Rule::RepeatMany(rule, _) | Rule::RepeatMore(rule, _) => match **rule { Rule::Eat(_) | Rule::Call(_) => vec![], @@ -41,7 +42,7 @@ impl RuleWithNamedFields { Rule::Opt(_) => vec![0], _ => vec![], }; - self.fields.insert(name.to_string(), indexset![path]); + self.fields.insert(name, indexset![path]); self } pub fn opt(mut self) -> Self { @@ -145,7 +146,8 @@ impl Add for RuleWithNamedFields { }) .collect(); for (name, paths) in other.fields { - assert!(!self.fields.contains_key(&name), "duplicate field {}", name); + // FIXME(eddyb) uncomment once we have `Context` in scope. + // assert!(!self.fields.contains_key(&name), "duplicate field {}", cx[name]); self.fields.insert( name, paths @@ -206,7 +208,7 @@ pub enum SepKind { pub enum Rule { Empty, Eat(Pat), - Call(String), + Call(IStr), Concat([Rc>; 2]), Or(Vec>>), @@ -320,26 +322,30 @@ impl Rule { } } - pub(crate) fn check_call_names(&self, grammar: &crate::Grammar) { + pub(crate) fn check_call_names(&self, cx: &Context, grammar: &crate::Grammar) { match self { Rule::Empty | Rule::Eat(_) => {} Rule::Call(rule) => { - assert!(grammar.rules.contains_key(rule), "no rule named `{}`", rule); + assert!( + grammar.rules.contains_key(rule), + "no rule named `{}`", + cx[*rule] + ); } Rule::Concat([left, right]) => { - left.check_call_names(grammar); - right.check_call_names(grammar); + left.check_call_names(cx, grammar); + right.check_call_names(cx, grammar); } Rule::Or(rules) => { for rule in rules { - rule.check_call_names(grammar); + rule.check_call_names(cx, grammar); } } - Rule::Opt(rule) => rule.check_call_names(grammar), + Rule::Opt(rule) => rule.check_call_names(cx, grammar), Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { - elem.check_call_names(grammar); + elem.check_call_names(cx, grammar); if let Some((sep, _)) = sep { - sep.check_call_names(grammar); + sep.check_call_names(cx, grammar); } } } @@ -427,8 +433,8 @@ impl RuleWithNamedFields { fn filter_fields<'a>( &'a self, field: Option, - ) -> impl Iterator>)> + 'a { - self.fields.iter().filter_map(move |(name, paths)| { + ) -> impl Iterator>)> + 'a { + self.fields.iter().filter_map(move |(&name, paths)| { let paths: IndexSet<_> = paths .iter() .filter_map(move |path| { @@ -440,7 +446,7 @@ impl RuleWithNamedFields { }) .collect(); if !paths.is_empty() { - Some((name.clone(), paths)) + Some((name, paths)) } else { None } From c0d96692d1f94b6d8a98f24951fdc9c9a9107a78 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 16 May 2019 12:37:50 +0300 Subject: [PATCH 4/7] rule: introduce a `FieldPathset` to simplify building methods. --- src/rule.rs | 181 +++++++++++++++++++++------------------------------- 1 file changed, 74 insertions(+), 107 deletions(-) diff --git a/src/rule.rs b/src/rule.rs index 25fa2b9..ae403db 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -10,7 +10,24 @@ use std::rc::Rc; #[derive(Clone)] pub struct RuleWithNamedFields { pub rule: Rc>, - pub fields: IndexMap>>, + pub fields: IndexMap, +} + +#[derive(Clone, Default)] +pub struct FieldPathset(pub IndexSet>); + +impl FieldPathset { + fn prepend_all(self, i: usize) -> Self { + FieldPathset( + self.0 + .into_iter() + .map(|mut path| { + path.insert(0, i); + path + }) + .collect(), + ) + } } pub fn empty() -> RuleWithNamedFields { @@ -42,80 +59,50 @@ impl RuleWithNamedFields { Rule::Opt(_) => vec![0], _ => vec![], }; - self.fields.insert(name, indexset![path]); + self.fields.insert(name, FieldPathset(indexset![path])); self } - pub fn opt(mut self) -> Self { - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) - .collect(); - self.rule = Rc::new(Rule::Opt(self.rule)); - self + pub fn opt(self) -> Self { + RuleWithNamedFields { + rule: Rc::new(Rule::Opt(self.rule)), + fields: self + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } } - pub fn repeat_many(mut self, sep: Option<(Self, SepKind)>) -> Self { - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) - .collect(); + pub fn repeat_many(self, sep: Option<(Self, SepKind)>) -> Self { if let Some((sep, _)) = &sep { assert!(sep.fields.is_empty()); } - self.rule = Rc::new(Rule::RepeatMany( - self.rule, - sep.map(|(sep, kind)| (sep.rule, kind)), - )); - self + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMany( + self.rule, + sep.map(|(sep, kind)| (sep.rule, kind)), + )), + fields: self + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } } - pub fn repeat_more(mut self, sep: Option<(Self, SepKind)>) -> Self { - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) - .collect(); + pub fn repeat_more(self, sep: Option<(Self, SepKind)>) -> Self { if let Some((sep, _)) = &sep { assert!(sep.fields.is_empty()); } - self.rule = Rc::new(Rule::RepeatMore( - self.rule, - sep.map(|(sep, kind)| (sep.rule, kind)), - )); - self + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMore( + self.rule, + sep.map(|(sep, kind)| (sep.rule, kind)), + )), + fields: self + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } } } @@ -132,32 +119,12 @@ impl Add for RuleWithNamedFields { self.fields = self .fields .into_iter() - .map(|(name, paths)| { - ( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 0); - path - }) - .collect(), - ) - }) + .map(|(name, paths)| (name, paths.prepend_all(0))) .collect(); for (name, paths) in other.fields { // FIXME(eddyb) uncomment once we have `Context` in scope. // assert!(!self.fields.contains_key(&name), "duplicate field {}", cx[name]); - self.fields.insert( - name, - paths - .into_iter() - .map(|mut path| { - path.insert(0, 1); - path - }) - .collect(), - ); + self.fields.insert(name, paths.prepend_all(1)); } self.rule = Rc::new(Rule::Concat([self.rule, other.rule])); self @@ -173,22 +140,21 @@ impl BitOr for RuleWithNamedFields { _ => (&[][..], Some(self), IndexMap::new()), }; - let new_rules = - this.into_iter() - .chain(iter::once(other)) - .enumerate() - .map(|(i, rule)| { - for (name, paths) in rule.fields { - fields.entry(name).or_insert_with(IndexSet::new).extend( - paths.into_iter().map(|mut path| { - path.insert(0, old_rules.len() + i); - path - }), - ); - } + let new_rules = this + .into_iter() + .chain(iter::once(other)) + .enumerate() + .map(|(i, rule)| { + for (name, paths) in rule.fields { + fields + .entry(name) + .or_default() + .0 + .extend(paths.prepend_all(old_rules.len() + i).0); + } - rule.rule - }); + rule.rule + }); let rules = old_rules.iter().cloned().chain(new_rules).collect(); RuleWithNamedFields { @@ -219,11 +185,11 @@ pub enum Rule { } impl Rule { - pub fn field_pathset_is_refutable(&self, paths: &IndexSet>) -> bool { - if paths.len() > 1 { + pub fn field_pathset_is_refutable(&self, paths: &FieldPathset) -> bool { + if paths.0.len() > 1 { true } else { - self.field_is_refutable(paths.get_index(0).unwrap()) + self.field_is_refutable(paths.0.get_index(0).unwrap()) } } pub fn field_is_refutable(&self, path: &[usize]) -> bool { @@ -433,9 +399,10 @@ impl RuleWithNamedFields { fn filter_fields<'a>( &'a self, field: Option, - ) -> impl Iterator>)> + 'a { + ) -> impl Iterator + 'a { self.fields.iter().filter_map(move |(&name, paths)| { let paths: IndexSet<_> = paths + .0 .iter() .filter_map(move |path| { if path.first().cloned() == field { @@ -446,7 +413,7 @@ impl RuleWithNamedFields { }) .collect(); if !paths.is_empty() { - Some((name, paths)) + Some((name, FieldPathset(paths))) } else { None } From 4165cdbd1f6c1b5e9204dea7dfeb1c1e52ef83e2 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 16 May 2019 12:48:02 +0300 Subject: [PATCH 5/7] rule: split `repeat_{many,more}` builder methods into plain and `_sep` versions. --- src/lib.rs | 6 ++-- src/rule.rs | 84 ++++++++++++++++++++++++++++++++--------------------- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3de95b2..7036781 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,13 +79,13 @@ where rule!($rule).opt() }; ({ $elem:tt * }) => { - rule!($elem).repeat_many(None) + rule!($elem).repeat_many() }; ({ $elem:tt + }) => { - rule!($elem).repeat_more(None) + rule!($elem).repeat_more() }; ({ $elem:tt + % $sep:tt }) => { - rule!($elem).repeat_more(Some((rule!($sep), SepKind::Simple))) + rule!($elem).repeat_more_sep(rule!($sep), SepKind::Simple) }; ({ $rule0:tt $(| $rule:tt)+ }) => { rule!($rule0) $(| rule!($rule))+ diff --git a/src/rule.rs b/src/rule.rs index ae403db..5c3b26f 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -72,15 +72,20 @@ impl RuleWithNamedFields { .collect(), } } - pub fn repeat_many(self, sep: Option<(Self, SepKind)>) -> Self { - if let Some((sep, _)) = &sep { - assert!(sep.fields.is_empty()); + pub fn repeat_many(self) -> Self { + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMany(self.rule, None)), + fields: self + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), } + } + pub fn repeat_many_sep(self, sep: Self, kind: SepKind) -> Self { + assert!(sep.fields.is_empty()); RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMany( - self.rule, - sep.map(|(sep, kind)| (sep.rule, kind)), - )), + rule: Rc::new(Rule::RepeatMany(self.rule, Some((sep.rule, kind)))), fields: self .fields .into_iter() @@ -88,15 +93,20 @@ impl RuleWithNamedFields { .collect(), } } - pub fn repeat_more(self, sep: Option<(Self, SepKind)>) -> Self { - if let Some((sep, _)) = &sep { - assert!(sep.fields.is_empty()); + pub fn repeat_more(self) -> Self { + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMore(self.rule, None)), + fields: self + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), } + } + pub fn repeat_more_sep(self, sep: Self, kind: SepKind) -> Self { + assert!(sep.fields.is_empty()); RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMore( - self.rule, - sep.map(|(sep, kind)| (sep.rule, kind)), - )), + rule: Rc::new(Rule::RepeatMore(self.rule, Some((sep.rule, kind)))), fields: self .fields .into_iter() @@ -381,16 +391,24 @@ pub trait Folder: Sized { elem: RuleWithNamedFields, sep: Option<(RuleWithNamedFields, SepKind)>, ) -> RuleWithNamedFields { - elem.fold(self) - .repeat_many(sep.map(|(sep, kind)| (sep.fold(self), kind))) + let elem = elem.fold(self); + let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); + match sep { + None => elem.repeat_many(), + Some((sep, kind)) => elem.repeat_many_sep(sep, kind), + } } fn fold_repeat_more( &mut self, elem: RuleWithNamedFields, sep: Option<(RuleWithNamedFields, SepKind)>, ) -> RuleWithNamedFields { - elem.fold(self) - .repeat_more(sep.map(|(sep, kind)| (sep.fold(self), kind))) + let elem = elem.fold(self); + let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); + match sep { + None => elem.repeat_more(), + Some((sep, kind)) => elem.repeat_more_sep(sep, kind), + } } } @@ -477,23 +495,23 @@ impl RuleWithNamedFields { elem: RuleWithNamedFields, sep: Option<(RuleWithNamedFields, SepKind)>, ) -> RuleWithNamedFields { + let elem = elem.fold(self); + let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { // A* => A* % WS - None => elem - .fold(self) - .repeat_more(Some((self.whitespace.clone(), SepKind::Simple))), + None => elem.repeat_more_sep(self.whitespace.clone(), SepKind::Simple), // A* % B => A* % (WS B WS) - Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more(Some(( + Some((sep, SepKind::Simple)) => elem.repeat_more_sep( self.whitespace.clone() + sep + self.whitespace.clone(), SepKind::Simple, - ))), + ), // FIXME(cad97) this will insert too many whitespace rules // A* %% B => ??? // Currently, A* %% (WS B WS), which allows trailing whitespace incorrectly - Some((sep, SepKind::Trailing)) => elem.fold(self).repeat_more(Some(( - self.whitespace.clone() + sep.clone() + self.whitespace.clone(), + Some((sep, SepKind::Trailing)) => elem.repeat_more_sep( + self.whitespace.clone() + sep + self.whitespace.clone(), SepKind::Trailing, - ))), + ), } } fn fold_repeat_more( @@ -501,22 +519,22 @@ impl RuleWithNamedFields { elem: RuleWithNamedFields, sep: Option<(RuleWithNamedFields, SepKind)>, ) -> RuleWithNamedFields { + let elem = elem.fold(self); + let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { // A+ => A+ % WS - None => elem - .fold(self) - .repeat_more(Some((self.whitespace.clone(), SepKind::Simple))), + None => elem.repeat_more_sep(self.whitespace.clone(), SepKind::Simple), // A+ % B => A+ % (WS B WS) - Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more(Some(( + Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more_sep( self.whitespace.clone() + sep + self.whitespace.clone(), SepKind::Simple, - ))), + ), // A+ %% B => A+ % (WS B WS) (WS B)? Some((sep, SepKind::Trailing)) => { - elem.fold(self).repeat_more(Some(( + elem.repeat_more_sep( self.whitespace.clone() + sep.clone() + self.whitespace.clone(), SepKind::Simple, - ))) + (self.whitespace.clone() + sep).opt() + ) + (self.whitespace.clone() + sep).opt() } } } From d10ea9df752e3d55043610fdeffcdc80f19ecc96 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 16 May 2019 14:53:07 +0300 Subject: [PATCH 6/7] rule: put all the rule-building methods/operators behind a "build API". --- src/lib.rs | 17 +- src/rule.rs | 492 ++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 359 insertions(+), 150 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7036781..5475769 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,11 @@ impl Grammar { pub fn extend(&mut self, other: Self) { self.rules.extend(other.rules); } - pub fn insert_whitespace(self, whitespace: rule::RuleWithNamedFields) -> Self + pub fn insert_whitespace( + self, + cx: &mut Context, + whitespace: rule::RuleWithNamedFields, + ) -> Self where Pat: Clone, { @@ -32,7 +36,7 @@ impl Grammar { rules: self .rules .into_iter() - .map(|(name, rule)| (name, rule.insert_whitespace(whitespace.clone()))) + .map(|(name, rule)| (name, rule.insert_whitespace(cx, whitespace.clone()))) .collect(), } } @@ -70,10 +74,10 @@ where negative_lookahead($start..=$end) }; ($rule:ident) => { - call(cx.intern(stringify!($rule))) + call(stringify!($rule)) }; ({ $name:ident : $rule:tt }) => { - rule!($rule).field(cx.intern(stringify!($name))) + rule!($rule).field(stringify!($name)) }; ({ $rule:tt ? }) => { rule!($rule).opt() @@ -101,7 +105,10 @@ where macro_rules! grammar { ($($rule_name:ident = $($rule:tt)|+;)*) => ({ let mut grammar = Grammar::new(); - $(grammar.define(cx.intern(stringify!($rule_name)), rule!({ $($rule)|+ }));)* + $(grammar.define( + cx.intern(stringify!($rule_name)), + rule!({ $($rule)|+ }).finish(cx), + );)* grammar }) } diff --git a/src/rule.rs b/src/rule.rs index 5c3b26f..7ba2011 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -30,150 +30,334 @@ impl FieldPathset { } } -pub fn empty() -> RuleWithNamedFields { - RuleWithNamedFields { - rule: Rc::new(Rule::Empty), - fields: IndexMap::new(), +/// Helpers for building rules without needing a `Context` until the very end. +/// +/// NOTE: the module is private to disallow referring to the trait / types, +/// as they are an implementation detail of the builder methods and operators. +mod build { + use super::*; + + // HACK(eddyb) like `Into` but using an associated type. + // Needed for constraining the RHS of operator overload impls. + pub trait Start { + type Out; + + fn start(self) -> Self::Out; } -} -pub fn eat(pat: impl Into) -> RuleWithNamedFields { - RuleWithNamedFields { - rule: Rc::new(Rule::Eat(pat.into())), - fields: IndexMap::new(), + + impl Start for RuleWithNamedFields { + type Out = RuleWithNamedFields; + + fn start(self) -> Self::Out { + self + } } -} -pub fn call(name: IStr) -> RuleWithNamedFields { - RuleWithNamedFields { - rule: Rc::new(Rule::Call(name)), - fields: IndexMap::new(), + + pub trait Finish { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields; } -} -impl RuleWithNamedFields { - pub fn field(mut self, name: IStr) -> Self { - let path = match &*self.rule { - Rule::RepeatMany(rule, _) | Rule::RepeatMore(rule, _) => match **rule { - Rule::Eat(_) | Rule::Call(_) => vec![], - _ => unimplemented!(), - }, - Rule::Opt(_) => vec![0], - _ => vec![], - }; - self.fields.insert(name, FieldPathset(indexset![path])); - self + impl Finish for RuleWithNamedFields { + fn finish(self, _cx: &mut Context) -> RuleWithNamedFields { + self + } } - pub fn opt(self) -> Self { - RuleWithNamedFields { - rule: Rc::new(Rule::Opt(self.rule)), - fields: self - .fields - .into_iter() - .map(|(name, paths)| (name, paths.prepend_all(0))) - .collect(), + + pub struct Empty; + + impl Finish for Empty { + fn finish(self, _cx: &mut Context) -> RuleWithNamedFields { + RuleWithNamedFields { + rule: Rc::new(Rule::Empty), + fields: IndexMap::new(), + } } } - pub fn repeat_many(self) -> Self { - RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMany(self.rule, None)), - fields: self - .fields - .into_iter() - .map(|(name, paths)| (name, paths.prepend_all(0))) - .collect(), + + pub struct Eat(Pat); + + impl Finish for Eat { + fn finish(self, _cx: &mut Context) -> RuleWithNamedFields { + RuleWithNamedFields { + rule: Rc::new(Rule::Eat(self.0)), + fields: IndexMap::new(), + } } } - pub fn repeat_many_sep(self, sep: Self, kind: SepKind) -> Self { - assert!(sep.fields.is_empty()); - RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMany(self.rule, Some((sep.rule, kind)))), - fields: self - .fields - .into_iter() - .map(|(name, paths)| (name, paths.prepend_all(0))) - .collect(), + + pub struct Call<'a>(&'a str); + + impl Finish for Call<'_> { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + RuleWithNamedFields { + rule: Rc::new(Rule::Call(cx.intern(self.0))), + fields: IndexMap::new(), + } + } + } + + pub struct Field<'a, R>(R, &'a str); + + impl> Finish for Field<'_, R> { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let mut rule = self.0.finish(cx); + let name = cx.intern(self.1); + let path = match &*rule.rule { + Rule::RepeatMany(rule, _) | Rule::RepeatMore(rule, _) => match **rule { + Rule::Eat(_) | Rule::Call(_) => vec![], + _ => unimplemented!(), + }, + Rule::Opt(_) => vec![0], + _ => vec![], + }; + rule.fields.insert(name, FieldPathset(indexset![path])); + rule } } - pub fn repeat_more(self) -> Self { - RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMore(self.rule, None)), - fields: self + + pub struct Opt(R); + + impl> Finish for Opt { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let rule = self.0.finish(cx); + RuleWithNamedFields { + rule: Rc::new(Rule::Opt(rule.rule)), + fields: rule + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } + } + } + + pub struct RepeatMany(E); + + impl> Finish for RepeatMany { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let elem = self.0.finish(cx); + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMany(elem.rule, None)), + fields: elem + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } + } + } + + pub struct RepeatManySep(E, S, SepKind); + + impl, S: Finish> Finish for RepeatManySep { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let elem = self.0.finish(cx); + let sep = self.1.finish(cx); + assert!(sep.fields.is_empty()); + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMany(elem.rule, Some((sep.rule, self.2)))), + fields: elem + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } + } + } + + pub struct RepeatMore(E); + + impl> Finish for RepeatMore { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let elem = self.0.finish(cx); + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMore(elem.rule, None)), + fields: elem + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } + } + } + + pub struct RepeatMoreSep(E, S, SepKind); + + impl, S: Finish> Finish for RepeatMoreSep { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let elem = self.0.finish(cx); + let sep = self.1.finish(cx); + assert!(sep.fields.is_empty()); + RuleWithNamedFields { + rule: Rc::new(Rule::RepeatMore(elem.rule, Some((sep.rule, self.2)))), + fields: elem + .fields + .into_iter() + .map(|(name, paths)| (name, paths.prepend_all(0))) + .collect(), + } + } + } + + pub struct Concat(A, B); + + impl, B: Finish> Finish for Concat { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let a = self.0.finish(cx); + let b = self.1.finish(cx); + + match (&*a.rule, &*b.rule) { + (Rule::Empty, _) if a.fields.is_empty() => return b, + (_, Rule::Empty) if b.fields.is_empty() => return a, + _ => {} + } + + let mut fields: IndexMap<_, _> = a .fields .into_iter() .map(|(name, paths)| (name, paths.prepend_all(0))) - .collect(), + .collect(); + for (name, paths) in b.fields { + assert!(!fields.contains_key(&name), "duplicate field {}", cx[name]); + fields.insert(name, paths.prepend_all(1)); + } + RuleWithNamedFields { + rule: Rc::new(Rule::Concat([a.rule, b.rule])), + fields, + } } } - pub fn repeat_more_sep(self, sep: Self, kind: SepKind) -> Self { - assert!(sep.fields.is_empty()); - RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMore(self.rule, Some((sep.rule, kind)))), - fields: self - .fields + + pub struct Or(A, B); + + impl, B: Finish> Finish for Or { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let a = self.0.finish(cx); + let b = self.1.finish(cx); + + let (old_rules, a, mut fields) = match &*a.rule { + Rule::Or(rules) => (&rules[..], None, a.fields), + _ => (&[][..], Some(a), IndexMap::new()), + }; + + let new_rules = a .into_iter() - .map(|(name, paths)| (name, paths.prepend_all(0))) - .collect(), + .chain(iter::once(b)) + .enumerate() + .map(|(i, rule)| { + for (name, paths) in rule.fields { + fields + .entry(name) + .or_default() + .0 + .extend(paths.prepend_all(old_rules.len() + i).0); + } + + rule.rule + }); + let rules = old_rules.iter().cloned().chain(new_rules).collect(); + + RuleWithNamedFields { + rule: Rc::new(Rule::Or(rules)), + fields, + } } } -} -impl Add for RuleWithNamedFields { - type Output = Self; + /// Wrapper for building rules, to allow overloading operators uniformly. + pub struct Build(R); + + impl Start for Build { + type Out = R; - fn add(mut self, other: Self) -> Self { - match (&*self.rule, &*other.rule) { - (Rule::Empty, _) if self.fields.is_empty() => return other, - (_, Rule::Empty) if other.fields.is_empty() => return self, - _ => {} + fn start(self) -> R { + self.0 } + } - self.fields = self - .fields - .into_iter() - .map(|(name, paths)| (name, paths.prepend_all(0))) - .collect(); - for (name, paths) in other.fields { - // FIXME(eddyb) uncomment once we have `Context` in scope. - // assert!(!self.fields.contains_key(&name), "duplicate field {}", cx[name]); - self.fields.insert(name, paths.prepend_all(1)); + impl Build { + pub fn finish(self, cx: &mut Context) -> RuleWithNamedFields + where + R: Finish, + { + Finish::finish(self.0, cx) } - self.rule = Rc::new(Rule::Concat([self.rule, other.rule])); - self } -} -impl BitOr for RuleWithNamedFields { - type Output = Self; + pub fn empty() -> build::Build { + build::Build(build::Empty) + } - fn bitor(self, other: Self) -> Self { - let (old_rules, this, mut fields) = match &*self.rule { - Rule::Or(rules) => (&rules[..], None, self.fields), - _ => (&[][..], Some(self), IndexMap::new()), - }; + pub fn eat(pat: impl Into) -> build::Build> { + build::Build(build::Eat(pat.into())) + } - let new_rules = this - .into_iter() - .chain(iter::once(other)) - .enumerate() - .map(|(i, rule)| { - for (name, paths) in rule.fields { - fields - .entry(name) - .or_default() - .0 - .extend(paths.prepend_all(old_rules.len() + i).0); + pub fn call(name: &str) -> build::Build> { + build::Build(build::Call(name)) + } + + /// Helper macro to provide methods and operator overloads on both + /// `RuleWithNamedFields` and `Build`, instead of just one of them. + macro_rules! builder_impls { + (impl<$($g:ident),*> $Self:ty) => { + impl<$($g),*> $Self { + pub fn field<'a>(self, name: &'a str) -> Build::Out>> { + Build(Field(self.start(), name)) } - rule.rule - }); - let rules = old_rules.iter().cloned().chain(new_rules).collect(); + pub fn opt(self) -> Build::Out>> { + Build(Opt(self.start())) + } - RuleWithNamedFields { - rule: Rc::new(Rule::Or(rules)), - fields, - } + pub fn repeat_many(self) -> Build::Out>> { + Build(RepeatMany(self.start())) + } + + pub fn repeat_many_sep( + self, + sep: S, + kind: SepKind, + ) -> Build::Out, S::Out>> { + Build(RepeatManySep(self.start(), sep.start(), kind)) + } + + pub fn repeat_more(self) -> Build::Out>> { + Build(RepeatMore(self.start())) + } + + pub fn repeat_more_sep( + self, + sep: S, + kind: SepKind, + ) -> Build::Out, S::Out>> { + Build(RepeatMoreSep(self.start(), sep.start(), kind)) + } + } + + impl<$($g,)* Other: Start> Add for $Self { + type Output = Build::Out, Other::Out>>; + + fn add(self, other: Other) -> Self::Output { + Build(Concat(self.start(), other.start())) + } + } + + impl<$($g,)* Other: Start> BitOr for $Self { + type Output = Build::Out, Other::Out>>; + + fn bitor(self, other: Other) -> Self::Output { + Build(Or(self.start(), other.start())) + } + } + }; } + + builder_impls!(impl Build); + builder_impls!(impl RuleWithNamedFields); } +pub use self::build::{call, eat, empty}; + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum SepKind { Simple, @@ -365,6 +549,7 @@ pub trait MatchesEmpty { } pub trait Folder: Sized { + fn cx(&mut self) -> &mut Context; fn fold_leaf(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { rule } @@ -373,18 +558,17 @@ pub trait Folder: Sized { left: RuleWithNamedFields, right: RuleWithNamedFields, ) -> RuleWithNamedFields { - left.fold(self) + right.fold(self) + (left.fold(self) + right.fold(self)).finish(self.cx()) } fn fold_or( &mut self, - rules: impl Iterator>, + mut rules: impl Iterator>, ) -> RuleWithNamedFields { - let mut rules = rules.map(|rule| rule.fold(self)); - let first = rules.next().unwrap(); - rules.fold(first, |or, rule| or | rule) + let first = rules.next().unwrap().fold(self); + rules.fold(first, |or, rule| (or | rule.fold(self)).finish(self.cx())) } fn fold_opt(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { - rule.fold(self).opt() + rule.fold(self).opt().finish(self.cx()) } fn fold_repeat_many( &mut self, @@ -394,8 +578,8 @@ pub trait Folder: Sized { let elem = elem.fold(self); let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { - None => elem.repeat_many(), - Some((sep, kind)) => elem.repeat_many_sep(sep, kind), + None => elem.repeat_many().finish(self.cx()), + Some((sep, kind)) => elem.repeat_many_sep(sep, kind).finish(self.cx()), } } fn fold_repeat_more( @@ -406,8 +590,8 @@ pub trait Folder: Sized { let elem = elem.fold(self); let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { - None => elem.repeat_more(), - Some((sep, kind)) => elem.repeat_more_sep(sep, kind), + None => elem.repeat_more().finish(self.cx()), + Some((sep, kind)) => elem.repeat_more_sep(sep, kind).finish(self.cx()), } } } @@ -468,17 +652,25 @@ impl RuleWithNamedFields { rule } - pub fn insert_whitespace(self, whitespace: RuleWithNamedFields) -> Self + pub fn insert_whitespace( + self, + cx: &mut Context, + whitespace: RuleWithNamedFields, + ) -> Self where Pat: Clone, { assert!(whitespace.fields.is_empty()); - struct WhitespaceInserter { + struct WhitespaceInserter<'a, Pat> { + cx: &'a mut Context, whitespace: RuleWithNamedFields, } - impl Folder for WhitespaceInserter { + impl Folder for WhitespaceInserter<'_, Pat> { + fn cx(&mut self) -> &mut Context { + self.cx + } // FIXME(eddyb) this will insert too many whitespace rules, // e.g. `A B? C` becomes `A WS B? WS C`, which when `B` is // missing, is `A WS WS C`. Even worse, `A? B` ends up as @@ -488,7 +680,7 @@ impl RuleWithNamedFields { left: RuleWithNamedFields, right: RuleWithNamedFields, ) -> RuleWithNamedFields { - left.fold(self) + self.whitespace.clone() + right.fold(self) + (left.fold(self) + self.whitespace.clone() + right.fold(self)).finish(self.cx()) } fn fold_repeat_many( &mut self, @@ -499,19 +691,25 @@ impl RuleWithNamedFields { let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { // A* => A* % WS - None => elem.repeat_more_sep(self.whitespace.clone(), SepKind::Simple), + None => elem + .repeat_more_sep(self.whitespace.clone(), SepKind::Simple) + .finish(self.cx), // A* % B => A* % (WS B WS) - Some((sep, SepKind::Simple)) => elem.repeat_more_sep( - self.whitespace.clone() + sep + self.whitespace.clone(), - SepKind::Simple, - ), + Some((sep, SepKind::Simple)) => elem + .repeat_more_sep( + self.whitespace.clone() + sep + self.whitespace.clone(), + SepKind::Simple, + ) + .finish(self.cx), // FIXME(cad97) this will insert too many whitespace rules // A* %% B => ??? // Currently, A* %% (WS B WS), which allows trailing whitespace incorrectly - Some((sep, SepKind::Trailing)) => elem.repeat_more_sep( - self.whitespace.clone() + sep + self.whitespace.clone(), - SepKind::Trailing, - ), + Some((sep, SepKind::Trailing)) => elem + .repeat_more_sep( + self.whitespace.clone() + sep + self.whitespace.clone(), + SepKind::Trailing, + ) + .finish(self.cx), } } fn fold_repeat_more( @@ -523,23 +721,27 @@ impl RuleWithNamedFields { let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { // A+ => A+ % WS - None => elem.repeat_more_sep(self.whitespace.clone(), SepKind::Simple), + None => elem + .repeat_more_sep(self.whitespace.clone(), SepKind::Simple) + .finish(self.cx), // A+ % B => A+ % (WS B WS) - Some((sep, SepKind::Simple)) => elem.fold(self).repeat_more_sep( - self.whitespace.clone() + sep + self.whitespace.clone(), - SepKind::Simple, - ), - // A+ %% B => A+ % (WS B WS) (WS B)? - Some((sep, SepKind::Trailing)) => { - elem.repeat_more_sep( - self.whitespace.clone() + sep.clone() + self.whitespace.clone(), + Some((sep, SepKind::Simple)) => elem + .fold(self) + .repeat_more_sep( + self.whitespace.clone() + sep + self.whitespace.clone(), SepKind::Simple, - ) + (self.whitespace.clone() + sep).opt() - } + ) + .finish(self.cx), + // A+ %% B => A+ % (WS B WS) (WS B)? + Some((sep, SepKind::Trailing)) => (elem.repeat_more_sep( + self.whitespace.clone() + sep.clone() + self.whitespace.clone(), + SepKind::Simple, + ) + (self.whitespace.clone() + sep).opt()) + .finish(self.cx), } } } - self.fold(&mut WhitespaceInserter { whitespace }) + self.fold(&mut WhitespaceInserter { cx, whitespace }) } } From 00b27f5f89132c00ff239e1f0975fcb2e4b243c4 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 16 May 2019 17:16:39 +0300 Subject: [PATCH 7/7] Intern `Rule`s as well. --- src/context.rs | 13 +- src/lib.rs | 29 ++--- src/rule.rs | 313 ++++++++++++++++++++++++------------------------- 3 files changed, 169 insertions(+), 186 deletions(-) diff --git a/src/context.rs b/src/context.rs index 111a5e5..4eeaf9f 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,6 +1,6 @@ use indexmap::IndexSet; use std::convert::TryInto; -use std::marker::PhantomData; +use std::hash::Hash; /// Context object with global resources for working with grammar, /// such as interners. @@ -32,15 +32,13 @@ macro_rules! interners { ($($name:ident => $ty:ty),* $(,)?) => { #[allow(non_snake_case)] struct Interners { - $($name: IndexSet<$ty>,)* - _marker: PhantomData, + $($name: IndexSet<$ty>),* } impl Default for Interners { fn default() -> Self { Interners { - $($name: IndexSet::new(),)* - _marker: PhantomData, + $($name: IndexSet::new()),* } } } @@ -49,7 +47,7 @@ macro_rules! interners { #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct $name(u32); - impl InternInCx for $ty { + impl InternInCx for $ty { type Interned = $name; fn intern_in_cx(self, cx: &mut Context) -> Self::Interned { @@ -70,9 +68,10 @@ macro_rules! interners { interners! { IStr => String, + IRule => crate::rule::Rule, } -impl InternInCx for &'_ str { +impl InternInCx for &'_ str { type Interned = IStr; fn intern_in_cx(self, cx: &mut Context) -> IStr { diff --git a/src/lib.rs b/src/lib.rs index 5475769..32f8bfc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,30 +8,27 @@ use std::hash::Hash; pub mod context; pub mod rule; -pub struct Grammar { - pub rules: IndexMap>, +pub struct Grammar { + pub rules: IndexMap, } -impl Grammar { +impl Grammar { pub fn new() -> Self { Grammar { rules: IndexMap::new(), } } - pub fn define(&mut self, name: IStr, rule: rule::RuleWithNamedFields) { + pub fn define(&mut self, name: IStr, rule: rule::RuleWithNamedFields) { self.rules.insert(name, rule); } pub fn extend(&mut self, other: Self) { self.rules.extend(other.rules); } - pub fn insert_whitespace( + pub fn insert_whitespace( self, cx: &mut Context, - whitespace: rule::RuleWithNamedFields, - ) -> Self - where - Pat: Clone, - { + whitespace: rule::RuleWithNamedFields, + ) -> Self { Grammar { rules: self .rules @@ -42,24 +39,22 @@ impl Grammar { } } -impl Grammar { - pub fn check(&self, cx: &Context) { +impl Grammar { + pub fn check(&self, cx: &Context) { for rule in self.rules.values() { rule.rule.check_call_names(cx, self); } let mut can_be_empty_cache = HashMap::new(); for rule in self.rules.values() { - rule.rule.check_non_empty_opt(&mut can_be_empty_cache, self); + rule.rule + .check_non_empty_opt(&mut can_be_empty_cache, cx, self); } } } /// Construct a (meta-)grammar for parsing a grammar. -pub fn grammar_grammar(cx: &mut Context) -> Grammar -where - Pat: Clone + From<&'static str>, -{ +pub fn grammar_grammar>(cx: &mut Context) -> Grammar { use crate::rule::*; // HACK(eddyb) more explicit subset of the grammar, for bootstrapping. diff --git a/src/rule.rs b/src/rule.rs index 7ba2011..78fd9fa 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -1,15 +1,14 @@ -use crate::context::{Context, IStr}; +use crate::context::{Context, IRule, IStr}; use indexmap::{indexset, IndexMap, IndexSet}; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::hash::Hash; use std::iter; use std::ops::{Add, BitAnd, BitOr}; -use std::rc::Rc; #[derive(Clone)] -pub struct RuleWithNamedFields { - pub rule: Rc>, +pub struct RuleWithNamedFields { + pub rule: IRule, pub fields: IndexMap, } @@ -45,8 +44,8 @@ mod build { fn start(self) -> Self::Out; } - impl Start for RuleWithNamedFields { - type Out = RuleWithNamedFields; + impl Start for RuleWithNamedFields { + type Out = RuleWithNamedFields; fn start(self) -> Self::Out { self @@ -54,21 +53,21 @@ mod build { } pub trait Finish { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields; + fn finish(self, cx: &mut Context) -> RuleWithNamedFields; } - impl Finish for RuleWithNamedFields { - fn finish(self, _cx: &mut Context) -> RuleWithNamedFields { + impl Finish for RuleWithNamedFields { + fn finish(self, _cx: &mut Context) -> RuleWithNamedFields { self } } pub struct Empty; - impl Finish for Empty { - fn finish(self, _cx: &mut Context) -> RuleWithNamedFields { + impl Finish for Empty { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { RuleWithNamedFields { - rule: Rc::new(Rule::Empty), + rule: cx.intern(Rule::Empty), fields: IndexMap::new(), } } @@ -76,10 +75,10 @@ mod build { pub struct Eat(Pat); - impl Finish for Eat { - fn finish(self, _cx: &mut Context) -> RuleWithNamedFields { + impl Finish for Eat { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { RuleWithNamedFields { - rule: Rc::new(Rule::Eat(self.0)), + rule: cx.intern(Rule::Eat(self.0)), fields: IndexMap::new(), } } @@ -87,10 +86,11 @@ mod build { pub struct Call<'a>(&'a str); - impl Finish for Call<'_> { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl Finish for Call<'_> { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + let name = cx.intern(self.0); RuleWithNamedFields { - rule: Rc::new(Rule::Call(cx.intern(self.0))), + rule: cx.intern(Rule::Call(name)), fields: IndexMap::new(), } } @@ -98,12 +98,12 @@ mod build { pub struct Field<'a, R>(R, &'a str); - impl> Finish for Field<'_, R> { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl> Finish for Field<'_, R> { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let mut rule = self.0.finish(cx); let name = cx.intern(self.1); - let path = match &*rule.rule { - Rule::RepeatMany(rule, _) | Rule::RepeatMore(rule, _) => match **rule { + let path = match cx[rule.rule] { + Rule::RepeatMany(rule, _) | Rule::RepeatMore(rule, _) => match cx[rule] { Rule::Eat(_) | Rule::Call(_) => vec![], _ => unimplemented!(), }, @@ -117,11 +117,11 @@ mod build { pub struct Opt(R); - impl> Finish for Opt { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl> Finish for Opt { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let rule = self.0.finish(cx); RuleWithNamedFields { - rule: Rc::new(Rule::Opt(rule.rule)), + rule: cx.intern(Rule::Opt(rule.rule)), fields: rule .fields .into_iter() @@ -133,11 +133,11 @@ mod build { pub struct RepeatMany(E); - impl> Finish for RepeatMany { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl> Finish for RepeatMany { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let elem = self.0.finish(cx); RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMany(elem.rule, None)), + rule: cx.intern(Rule::RepeatMany(elem.rule, None)), fields: elem .fields .into_iter() @@ -149,13 +149,13 @@ mod build { pub struct RepeatManySep(E, S, SepKind); - impl, S: Finish> Finish for RepeatManySep { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl, S: Finish> Finish for RepeatManySep { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let elem = self.0.finish(cx); let sep = self.1.finish(cx); assert!(sep.fields.is_empty()); RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMany(elem.rule, Some((sep.rule, self.2)))), + rule: cx.intern(Rule::RepeatMany(elem.rule, Some((sep.rule, self.2)))), fields: elem .fields .into_iter() @@ -167,11 +167,11 @@ mod build { pub struct RepeatMore(E); - impl> Finish for RepeatMore { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl> Finish for RepeatMore { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let elem = self.0.finish(cx); RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMore(elem.rule, None)), + rule: cx.intern(Rule::RepeatMore(elem.rule, None)), fields: elem .fields .into_iter() @@ -183,13 +183,13 @@ mod build { pub struct RepeatMoreSep(E, S, SepKind); - impl, S: Finish> Finish for RepeatMoreSep { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl, S: Finish> Finish for RepeatMoreSep { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let elem = self.0.finish(cx); let sep = self.1.finish(cx); assert!(sep.fields.is_empty()); RuleWithNamedFields { - rule: Rc::new(Rule::RepeatMore(elem.rule, Some((sep.rule, self.2)))), + rule: cx.intern(Rule::RepeatMore(elem.rule, Some((sep.rule, self.2)))), fields: elem .fields .into_iter() @@ -201,12 +201,12 @@ mod build { pub struct Concat(A, B); - impl, B: Finish> Finish for Concat { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl, B: Finish> Finish for Concat { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let a = self.0.finish(cx); let b = self.1.finish(cx); - match (&*a.rule, &*b.rule) { + match (&cx[a.rule], &cx[b.rule]) { (Rule::Empty, _) if a.fields.is_empty() => return b, (_, Rule::Empty) if b.fields.is_empty() => return a, _ => {} @@ -222,7 +222,7 @@ mod build { fields.insert(name, paths.prepend_all(1)); } RuleWithNamedFields { - rule: Rc::new(Rule::Concat([a.rule, b.rule])), + rule: cx.intern(Rule::Concat([a.rule, b.rule])), fields, } } @@ -230,13 +230,13 @@ mod build { pub struct Or(A, B); - impl, B: Finish> Finish for Or { - fn finish(self, cx: &mut Context) -> RuleWithNamedFields { + impl, B: Finish> Finish for Or { + fn finish(self, cx: &mut Context) -> RuleWithNamedFields { let a = self.0.finish(cx); let b = self.1.finish(cx); - let (old_rules, a, mut fields) = match &*a.rule { - Rule::Or(rules) => (&rules[..], None, a.fields), + let (old_rules, a, mut fields) = match cx[a.rule] { + Rule::Or(ref rules) => (&rules[..], None, a.fields), _ => (&[][..], Some(a), IndexMap::new()), }; @@ -258,7 +258,7 @@ mod build { let rules = old_rules.iter().cloned().chain(new_rules).collect(); RuleWithNamedFields { - rule: Rc::new(Rule::Or(rules)), + rule: cx.intern(Rule::Or(rules)), fields, } } @@ -276,7 +276,7 @@ mod build { } impl Build { - pub fn finish(self, cx: &mut Context) -> RuleWithNamedFields + pub fn finish(self, cx: &mut Context) -> RuleWithNamedFields where R: Finish, { @@ -353,7 +353,7 @@ mod build { } builder_impls!(impl Build); - builder_impls!(impl RuleWithNamedFields); + builder_impls!(impl<> RuleWithNamedFields); } pub use self::build::{call, eat, empty}; @@ -370,133 +370,120 @@ pub enum Rule { Eat(Pat), Call(IStr), - Concat([Rc>; 2]), - Or(Vec>>), + Concat([IRule; 2]), + Or(Vec), - Opt(Rc>), - RepeatMany(Rc>, Option<(Rc>, SepKind)>), - RepeatMore(Rc>, Option<(Rc>, SepKind)>), + Opt(IRule), + RepeatMany(IRule, Option<(IRule, SepKind)>), + RepeatMore(IRule, Option<(IRule, SepKind)>), } -impl Rule { - pub fn field_pathset_is_refutable(&self, paths: &FieldPathset) -> bool { +impl IRule { + pub fn field_pathset_is_refutable(self, cx: &Context, paths: &FieldPathset) -> bool { if paths.0.len() > 1 { true } else { - self.field_is_refutable(paths.0.get_index(0).unwrap()) + self.field_is_refutable(cx, paths.0.get_index(0).unwrap()) } } - pub fn field_is_refutable(&self, path: &[usize]) -> bool { - match self { + + pub fn field_is_refutable(self, cx: &Context, path: &[usize]) -> bool { + match cx[self] { Rule::Empty | Rule::Eat(_) | Rule::Call(_) | Rule::RepeatMany(..) | Rule::RepeatMore(..) => false, - Rule::Concat(rules) => rules[path[0]].field_is_refutable(&path[1..]), + Rule::Concat(rules) => rules[path[0]].field_is_refutable(cx, &path[1..]), Rule::Or(..) | Rule::Opt(_) => true, } } -} -// FIXME(eddyb) this should just work with `self: &Rc` on inherent methods, -// but that still requires `#![feature(arbitrary_self_types)]`. -trait RcRuleMethods: Sized { - fn can_be_empty( - &self, - cache: &mut HashMap>, - grammar: &crate::Grammar, - ) -> MaybeKnown; -} - -impl RcRuleMethods for Rc> { - fn can_be_empty( - &self, + fn can_be_empty( + self, cache: &mut HashMap>, - grammar: &crate::Grammar, + cx: &Context, + grammar: &crate::Grammar, ) -> MaybeKnown { - match cache.entry(self.clone()) { + match cache.entry(self) { Entry::Occupied(entry) => return *entry.get(), Entry::Vacant(entry) => { entry.insert(MaybeKnown::Unknown); } }; - let r = self.can_be_empty_uncached(cache, grammar); - match r { - MaybeKnown::Known(_) => *cache.get_mut(self).unwrap() = r, - MaybeKnown::Unknown => { - cache.remove(self); - } - } - r - } -} - -impl Rule { - fn can_be_empty_uncached( - &self, - cache: &mut HashMap, MaybeKnown>, - grammar: &crate::Grammar, - ) -> MaybeKnown { - match self { + let r = match cx[self] { Rule::Empty | Rule::Opt(_) | Rule::RepeatMany(..) => MaybeKnown::Known(true), - Rule::Eat(pat) => pat.matches_empty(), - Rule::Call(rule) => grammar.rules[rule].rule.can_be_empty(cache, grammar), + Rule::Eat(ref pat) => pat.matches_empty(), + Rule::Call(rule) => grammar.rules[&rule].rule.can_be_empty(cache, cx, grammar), Rule::Concat([left, right]) => { - left.can_be_empty(cache, grammar) & right.can_be_empty(cache, grammar) + left.can_be_empty(cache, cx, grammar) & right.can_be_empty(cache, cx, grammar) } - Rule::Or(rules) => rules.iter().fold(MaybeKnown::Known(false), |prev, rule| { - prev | rule.can_be_empty(cache, grammar) + Rule::Or(ref rules) => rules.iter().fold(MaybeKnown::Known(false), |prev, rule| { + prev | rule.can_be_empty(cache, cx, grammar) }), - Rule::RepeatMore(elem, _) => elem.can_be_empty(cache, grammar), + Rule::RepeatMore(elem, _) => elem.can_be_empty(cache, cx, grammar), + }; + match r { + MaybeKnown::Known(_) => *cache.get_mut(&self).unwrap() = r, + MaybeKnown::Unknown => { + cache.remove(&self); + } } + r } - pub(crate) fn check_non_empty_opt( - &self, - cache: &mut HashMap, MaybeKnown>, - grammar: &crate::Grammar, + pub(crate) fn check_non_empty_opt( + self, + cache: &mut HashMap>, + cx: &Context, + grammar: &crate::Grammar, ) { - match self { + match cx[self] { Rule::Empty | Rule::Eat(_) | Rule::Call(_) => {} Rule::Concat([left, right]) => { - left.check_non_empty_opt(cache, grammar); - right.check_non_empty_opt(cache, grammar); + left.check_non_empty_opt(cache, cx, grammar); + right.check_non_empty_opt(cache, cx, grammar); } - Rule::Or(rules) => { + Rule::Or(ref rules) => { for rule in rules { - rule.check_non_empty_opt(cache, grammar); + rule.check_non_empty_opt(cache, cx, grammar); } } Rule::Opt(rule) => { - assert_eq!(rule.can_be_empty(cache, grammar), MaybeKnown::Known(false)); - rule.check_non_empty_opt(cache, grammar) + assert_eq!( + rule.can_be_empty(cache, cx, grammar), + MaybeKnown::Known(false) + ); + rule.check_non_empty_opt(cache, cx, grammar) } Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { - assert_eq!(elem.can_be_empty(cache, grammar), MaybeKnown::Known(false)); - elem.check_non_empty_opt(cache, grammar); + assert_eq!( + elem.can_be_empty(cache, cx, grammar), + MaybeKnown::Known(false) + ); + elem.check_non_empty_opt(cache, cx, grammar); if let Some((sep, _)) = sep { - sep.check_non_empty_opt(cache, grammar); + sep.check_non_empty_opt(cache, cx, grammar); } } } } - pub(crate) fn check_call_names(&self, cx: &Context, grammar: &crate::Grammar) { - match self { + pub(crate) fn check_call_names(self, cx: &Context, grammar: &crate::Grammar) { + match cx[self] { Rule::Empty | Rule::Eat(_) => {} Rule::Call(rule) => { assert!( - grammar.rules.contains_key(rule), + grammar.rules.contains_key(&rule), "no rule named `{}`", - cx[*rule] + cx[rule] ); } Rule::Concat([left, right]) => { left.check_call_names(cx, grammar); right.check_call_names(cx, grammar); } - Rule::Or(rules) => { + Rule::Or(ref rules) => { for rule in rules { rule.check_call_names(cx, grammar); } @@ -548,33 +535,33 @@ pub trait MatchesEmpty { fn matches_empty(&self) -> MaybeKnown; } -pub trait Folder: Sized { +pub trait Folder: Sized { fn cx(&mut self) -> &mut Context; - fn fold_leaf(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { + fn fold_leaf(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { rule } fn fold_concat( &mut self, - left: RuleWithNamedFields, - right: RuleWithNamedFields, - ) -> RuleWithNamedFields { + left: RuleWithNamedFields, + right: RuleWithNamedFields, + ) -> RuleWithNamedFields { (left.fold(self) + right.fold(self)).finish(self.cx()) } fn fold_or( &mut self, - mut rules: impl Iterator>, - ) -> RuleWithNamedFields { + mut rules: impl Iterator, + ) -> RuleWithNamedFields { let first = rules.next().unwrap().fold(self); rules.fold(first, |or, rule| (or | rule.fold(self)).finish(self.cx())) } - fn fold_opt(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { + fn fold_opt(&mut self, rule: RuleWithNamedFields) -> RuleWithNamedFields { rule.fold(self).opt().finish(self.cx()) } fn fold_repeat_many( &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { let elem = elem.fold(self); let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { @@ -584,9 +571,9 @@ pub trait Folder: Sized { } fn fold_repeat_more( &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { let elem = elem.fold(self); let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { @@ -596,7 +583,7 @@ pub trait Folder: Sized { } } -impl RuleWithNamedFields { +impl RuleWithNamedFields { // HACK(eddyb) this is pretty expensive, find a better way fn filter_fields<'a>( &'a self, @@ -622,52 +609,54 @@ impl RuleWithNamedFields { }) } - pub fn fold(self, folder: &mut impl Folder) -> Self { - let field_rule = |rule: &Rc>, i| RuleWithNamedFields { - rule: rule.clone(), + pub fn fold(self, folder: &mut impl Folder) -> Self { + let field_rule = |rule, i| RuleWithNamedFields { + rule, fields: self.filter_fields(Some(i)).collect(), }; - let mut rule = match &*self.rule { + let mut rule = match folder.cx()[self.rule] { Rule::Empty | Rule::Eat(_) | Rule::Call(_) => return folder.fold_leaf(self), Rule::Concat([left, right]) => { folder.fold_concat(field_rule(left, 0), field_rule(right, 1)) } - Rule::Or(rules) => folder.fold_or( - rules - .iter() - .enumerate() - .map(|(i, rule)| field_rule(rule, i)), - ), + Rule::Or(ref rules) => { + // FIXME(eddyb) this is inefficient, but we can't be iterating + // `rules` while folding, at least not without e.g. an arena. + let rules = rules.clone(); + folder.fold_or( + rules + .into_iter() + .enumerate() + .map(|(i, rule)| field_rule(rule, i)), + ) + } Rule::Opt(rule) => folder.fold_opt(field_rule(rule, 0)), Rule::RepeatMany(elem, sep) => folder.fold_repeat_many( field_rule(elem, 0), - sep.as_ref().map(|(sep, kind)| (field_rule(sep, 1), *kind)), + sep.map(|(sep, kind)| (field_rule(sep, 1), kind)), ), Rule::RepeatMore(elem, sep) => folder.fold_repeat_more( field_rule(elem, 0), - sep.as_ref().map(|(sep, kind)| (field_rule(sep, 1), *kind)), + sep.map(|(sep, kind)| (field_rule(sep, 1), kind)), ), }; rule.fields.extend(self.filter_fields(None)); rule } - pub fn insert_whitespace( + pub fn insert_whitespace( self, cx: &mut Context, - whitespace: RuleWithNamedFields, - ) -> Self - where - Pat: Clone, - { + whitespace: RuleWithNamedFields, + ) -> Self { assert!(whitespace.fields.is_empty()); struct WhitespaceInserter<'a, Pat> { cx: &'a mut Context, - whitespace: RuleWithNamedFields, + whitespace: RuleWithNamedFields, } - impl Folder for WhitespaceInserter<'_, Pat> { + impl Folder for WhitespaceInserter<'_, Pat> { fn cx(&mut self) -> &mut Context { self.cx } @@ -677,16 +666,16 @@ impl RuleWithNamedFields { // `A? WS B`, which has an incorrect leading whitespace. fn fold_concat( &mut self, - left: RuleWithNamedFields, - right: RuleWithNamedFields, - ) -> RuleWithNamedFields { + left: RuleWithNamedFields, + right: RuleWithNamedFields, + ) -> RuleWithNamedFields { (left.fold(self) + self.whitespace.clone() + right.fold(self)).finish(self.cx()) } fn fold_repeat_many( &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { let elem = elem.fold(self); let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep { @@ -714,9 +703,9 @@ impl RuleWithNamedFields { } fn fold_repeat_more( &mut self, - elem: RuleWithNamedFields, - sep: Option<(RuleWithNamedFields, SepKind)>, - ) -> RuleWithNamedFields { + elem: RuleWithNamedFields, + sep: Option<(RuleWithNamedFields, SepKind)>, + ) -> RuleWithNamedFields { let elem = elem.fold(self); let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); match sep {