From 04347d8d25c8fe074560561b21170d138c773fd1 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Wed, 4 Jan 2023 21:25:27 +0800 Subject: [PATCH] Add to support `use filename.pest` for Pest grammars --- generator/src/lib.rs | 115 ++++++++++++++++++++++++++----- grammars/src/grammars/_base.pest | 1 + grammars/src/grammars/json.pest | 3 +- grammars/src/grammars/toml.pest | 2 +- meta/src/grammar.pest | 6 +- meta/src/parser.rs | 28 +++++++- 6 files changed, 133 insertions(+), 22 deletions(-) create mode 100644 grammars/src/grammars/_base.pest diff --git a/generator/src/lib.rs b/generator/src/lib.rs index ac595d3c..5bcd303f 100644 --- a/generator/src/lib.rs +++ b/generator/src/lib.rs @@ -24,7 +24,7 @@ extern crate quote; use std::env; use std::fs::File; use std::io::{self, Read}; -use std::path::Path; +use std::path::{Path, PathBuf}; use proc_macro2::TokenStream; use syn::{Attribute, DeriveInput, Generics, Ident, Lit, Meta}; @@ -36,6 +36,42 @@ mod generator; use pest_meta::parser::{self, rename_meta_rule, Rule}; use pest_meta::{optimizer, unwrap_or_report, validator}; +fn join_path(path: &str) -> PathBuf { + let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); + + // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR + // first. + // + // If we cannot find the expected file over there, fallback to the + // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience + // reasons. + // TODO: This could be refactored once `std::path::absolute()` get's stabilized. + // https://doc.rust-lang.org/std/path/fn.absolute.html + let path = if Path::new(&root).join(path).exists() { + Path::new(&root).join(path) + } else { + Path::new(&root).join("src/").join(path) + }; + + path +} + +/// Get path relative to `path` dir, or relative to root path +fn partial_path(path: Option<&PathBuf>, filename: &str) -> PathBuf { + let root = match path { + Some(path) => path.parent().unwrap().to_path_buf(), + None => join_path("./"), + }; + + // Add .pest suffix if not exist + let mut filename = filename.to_string(); + if !filename.to_lowercase().ends_with(".pest") { + filename.push_str(".pest"); + } + + root.join(filename) +} + /// Processes the derive/proc macro input and generates the corresponding parser based /// on the parsed grammar. If `include_grammar` is set to true, it'll generate an explicit /// "include_str" statement (done in pest_derive, but turned off in the local bootstrap). @@ -44,26 +80,13 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { let (name, generics, contents) = parse_derive(ast); let mut data = String::new(); + let mut has_use = false; let mut path = None; for content in contents { let (_data, _path) = match content { GrammarSource::File(ref path) => { - let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); - - // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR - // first. - // - // If we cannot find the expected file over there, fallback to the - // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience - // reasons. - // TODO: This could be refactored once `std::path::absolute()` get's stabilized. - // https://doc.rust-lang.org/std/path/fn.absolute.html - let path = if Path::new(&root).join(path).exists() { - Path::new(&root).join(path) - } else { - Path::new(&root).join("src/").join(path) - }; + let path = join_path(path); let file_name = match path.file_name() { Some(file_name) => file_name, @@ -85,13 +108,44 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { } } - let pairs = match parser::parse(Rule::grammar_rules, &data) { + // parse `use filename.pest` and replace data + let raw_data = data.clone(); + let mut pairs = match parser::parse(Rule::grammar_rules, &raw_data) { Ok(pairs) => pairs, Err(error) => panic!("error parsing \n{}", error.renamed_rules(rename_meta_rule)), }; + // parse `use filename.pest` and replace data + let mut partial_pairs = pairs.clone().flatten().peekable(); + while let Some(pair) = partial_pairs.next() { + if pair.as_rule() == Rule::_use { + if let Some(filename) = partial_pairs.peek() { + let partial_data = match read_file(partial_path(path.as_ref(), filename.as_str())) { + Ok(data) => data, + Err(error) => panic!("error opening {:?}: {}", filename, error), + }; + + let (start, end) = (pair.as_span().start(), pair.as_span().end()); + + data.replace_range(start..end, &partial_data); + has_use = true; + } else { + panic!("use must next with filename") + } + } + } + + if has_use { + // Re-parse the data after replacing the `use` statement + pairs = match parser::parse(Rule::grammar_rules, &data) { + Ok(pairs) => pairs, + Err(error) => panic!("error parsing \n{}", error.renamed_rules(rename_meta_rule)), + }; + } + let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone())); let ast = unwrap_or_report(parser::consume_rules(pairs)); + let optimized = optimizer::optimize(ast); generator::generate(name, &generics, path, optimized, defaults, include_grammar) @@ -155,6 +209,10 @@ fn get_attribute(attr: &Attribute) -> GrammarSource { #[cfg(test)] mod tests { + use std::path::PathBuf; + + use crate::partial_path; + use super::parse_derive; use super::GrammarSource; @@ -225,4 +283,27 @@ mod tests { let ast = syn::parse_str(definition).unwrap(); parse_derive(ast); } + + #[test] + fn test_partial_path() { + assert_eq!( + "tests/grammars/base.pest", + partial_path(Some(&PathBuf::from("tests/grammars/foo.pest")), "base") + .to_str() + .unwrap() + ); + + assert_eq!( + "tests/grammars/base.pest", + partial_path(Some(&PathBuf::from("tests/grammars/foo.pest")), "base.pest") + .to_str() + .unwrap() + ); + + let root = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); + assert_eq!( + std::path::Path::new(&root).join("base.pest"), + partial_path(None, "base.pest") + ); + } } diff --git a/grammars/src/grammars/_base.pest b/grammars/src/grammars/_base.pest new file mode 100644 index 00000000..941f3249 --- /dev/null +++ b/grammars/src/grammars/_base.pest @@ -0,0 +1 @@ +WHITESPACE = _{ " " | "\t" | "\r" | "\n" } \ No newline at end of file diff --git a/grammars/src/grammars/json.pest b/grammars/src/grammars/json.pest index f8b423a5..ffd982ea 100644 --- a/grammars/src/grammars/json.pest +++ b/grammars/src/grammars/json.pest @@ -6,6 +6,7 @@ // license , at your // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +use _base.pest json = { SOI ~ (object | array) ~ EOI } @@ -28,5 +29,3 @@ exp = @{ ("E" | "e") ~ ("+" | "-")? ~ ASCII_DIGIT+ } bool = { "true" | "false" } null = { "null" } - -WHITESPACE = _{ " " | "\t" | "\r" | "\n" } diff --git a/grammars/src/grammars/toml.pest b/grammars/src/grammars/toml.pest index b929cef6..c3aa4828 100644 --- a/grammars/src/grammars/toml.pest +++ b/grammars/src/grammars/toml.pest @@ -6,6 +6,7 @@ // license , at your // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +use ./_base toml = { SOI ~ (table | array_table | pair)* ~ EOI } @@ -70,5 +71,4 @@ exp = @{ ("E" | "e") ~ ("+" | "-")? ~ int } boolean = { "true" | "false" } -WHITESPACE = _{ " " | "\t" | NEWLINE } COMMENT = _{ "#" ~ (!NEWLINE ~ ANY)* } diff --git a/meta/src/grammar.pest b/meta/src/grammar.pest index 282ca35b..8873aa9f 100644 --- a/meta/src/grammar.pest +++ b/meta/src/grammar.pest @@ -11,7 +11,8 @@ grammar_rules = _{ SOI ~ grammar_rule+ ~ EOI } grammar_rule = { identifier ~ assignment_operator ~ modifier? ~ - opening_brace ~ expression ~ closing_brace + opening_brace ~ expression ~ closing_brace | + _use } assignment_operator = { "=" } @@ -96,3 +97,6 @@ newline = _{ "\n" | "\r\n" } WHITESPACE = _{ " " | "\t" | newline } block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" } COMMENT = _{ block_comment | ("//" ~ (!newline ~ ANY)*) } + +_use = ${ "use" ~ " "+ ~ path } +path = @{ (!newline ~ ANY)* ~ ".pest"? } \ No newline at end of file diff --git a/meta/src/parser.rs b/meta/src/parser.rs index fc0224b3..925315fb 100644 --- a/meta/src/parser.rs +++ b/meta/src/parser.rs @@ -243,6 +243,7 @@ pub fn rename_meta_rule(rule: &Rule) -> String { Rule::insensitive_string => "`^`".to_owned(), Rule::range_operator => "`..`".to_owned(), Rule::single_quote => "`'`".to_owned(), + Rule::_use => "use".to_owned(), other_rule => format!("{:?}", other_rule), } } @@ -1093,13 +1094,38 @@ mod tests { }; } + #[test] + fn test_use() { + parses_to! { + parser: PestParser, + input: "use foo", + rule: Rule::_use, + tokens: [ + _use(0, 7, [ + path(4, 7), + ]) + ] + }; + + parses_to! { + parser: PestParser, + input: "use foo.bar.pest", + rule: Rule::_use, + tokens: [ + _use(0, 17, [ + path(5, 17), + ]) + ] + }; + } + #[test] fn wrong_identifier() { fails_with! { parser: PestParser, input: "0", rule: Rule::grammar_rules, - positives: vec![Rule::identifier], + positives: vec![Rule::grammar_rule], negatives: vec![], pos: 0 };