From 04347d8d25c8fe074560561b21170d138c773fd1 Mon Sep 17 00:00:00 2001
From: Jason Lee <huacnlee@gmail.com>
Date: Wed, 4 Jan 2023 21:25:27 +0800
Subject: [PATCH] Add to support `use filename.pest` for Pest grammars

---
 generator/src/lib.rs             | 115 ++++++++++++++++++++++++++-----
 grammars/src/grammars/_base.pest |   1 +
 grammars/src/grammars/json.pest  |   3 +-
 grammars/src/grammars/toml.pest  |   2 +-
 meta/src/grammar.pest            |   6 +-
 meta/src/parser.rs               |  28 +++++++-
 6 files changed, 133 insertions(+), 22 deletions(-)
 create mode 100644 grammars/src/grammars/_base.pest

diff --git a/generator/src/lib.rs b/generator/src/lib.rs
index ac595d3c..5bcd303f 100644
--- a/generator/src/lib.rs
+++ b/generator/src/lib.rs
@@ -24,7 +24,7 @@ extern crate quote;
 use std::env;
 use std::fs::File;
 use std::io::{self, Read};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 
 use proc_macro2::TokenStream;
 use syn::{Attribute, DeriveInput, Generics, Ident, Lit, Meta};
@@ -36,6 +36,42 @@ mod generator;
 use pest_meta::parser::{self, rename_meta_rule, Rule};
 use pest_meta::{optimizer, unwrap_or_report, validator};
 
+fn join_path(path: &str) -> PathBuf {
+    let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into());
+
+    // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR
+    // first.
+    //
+    // If we cannot find the expected file over there, fallback to the
+    // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience
+    // reasons.
+    // TODO: This could be refactored once `std::path::absolute()` get's stabilized.
+    // https://doc.rust-lang.org/std/path/fn.absolute.html
+    let path = if Path::new(&root).join(path).exists() {
+        Path::new(&root).join(path)
+    } else {
+        Path::new(&root).join("src/").join(path)
+    };
+
+    path
+}
+
+/// Get path relative to `path` dir, or relative to root path
+fn partial_path(path: Option<&PathBuf>, filename: &str) -> PathBuf {
+    let root = match path {
+        Some(path) => path.parent().unwrap().to_path_buf(),
+        None => join_path("./"),
+    };
+
+    // Add .pest suffix if not exist
+    let mut filename = filename.to_string();
+    if !filename.to_lowercase().ends_with(".pest") {
+        filename.push_str(".pest");
+    }
+
+    root.join(filename)
+}
+
 /// Processes the derive/proc macro input and generates the corresponding parser based
 /// on the parsed grammar. If `include_grammar` is set to true, it'll generate an explicit
 /// "include_str" statement (done in pest_derive, but turned off in the local bootstrap).
@@ -44,26 +80,13 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream {
     let (name, generics, contents) = parse_derive(ast);
 
     let mut data = String::new();
+    let mut has_use = false;
     let mut path = None;
 
     for content in contents {
         let (_data, _path) = match content {
             GrammarSource::File(ref path) => {
-                let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into());
-
-                // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR
-                // first.
-                //
-                // If we cannot find the expected file over there, fallback to the
-                // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience
-                // reasons.
-                // TODO: This could be refactored once `std::path::absolute()` get's stabilized.
-                // https://doc.rust-lang.org/std/path/fn.absolute.html
-                let path = if Path::new(&root).join(path).exists() {
-                    Path::new(&root).join(path)
-                } else {
-                    Path::new(&root).join("src/").join(path)
-                };
+                let path = join_path(path);
 
                 let file_name = match path.file_name() {
                     Some(file_name) => file_name,
@@ -85,13 +108,44 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream {
         }
     }
 
-    let pairs = match parser::parse(Rule::grammar_rules, &data) {
+    // parse `use filename.pest` and replace data
+    let raw_data = data.clone();
+    let mut pairs = match parser::parse(Rule::grammar_rules, &raw_data) {
         Ok(pairs) => pairs,
         Err(error) => panic!("error parsing \n{}", error.renamed_rules(rename_meta_rule)),
     };
 
+    // parse `use filename.pest` and replace data
+    let mut partial_pairs = pairs.clone().flatten().peekable();
+    while let Some(pair) = partial_pairs.next() {
+        if pair.as_rule() == Rule::_use {
+            if let Some(filename) = partial_pairs.peek() {
+                let partial_data = match read_file(partial_path(path.as_ref(), filename.as_str())) {
+                    Ok(data) => data,
+                    Err(error) => panic!("error opening {:?}: {}", filename, error),
+                };
+
+                let (start, end) = (pair.as_span().start(), pair.as_span().end());
+
+                data.replace_range(start..end, &partial_data);
+                has_use = true;
+            } else {
+                panic!("use must next with filename")
+            }
+        }
+    }
+
+    if has_use {
+        // Re-parse the data after replacing the `use` statement
+        pairs = match parser::parse(Rule::grammar_rules, &data) {
+            Ok(pairs) => pairs,
+            Err(error) => panic!("error parsing \n{}", error.renamed_rules(rename_meta_rule)),
+        };
+    }
+
     let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone()));
     let ast = unwrap_or_report(parser::consume_rules(pairs));
+
     let optimized = optimizer::optimize(ast);
 
     generator::generate(name, &generics, path, optimized, defaults, include_grammar)
@@ -155,6 +209,10 @@ fn get_attribute(attr: &Attribute) -> GrammarSource {
 
 #[cfg(test)]
 mod tests {
+    use std::path::PathBuf;
+
+    use crate::partial_path;
+
     use super::parse_derive;
     use super::GrammarSource;
 
@@ -225,4 +283,27 @@ mod tests {
         let ast = syn::parse_str(definition).unwrap();
         parse_derive(ast);
     }
+
+    #[test]
+    fn test_partial_path() {
+        assert_eq!(
+            "tests/grammars/base.pest",
+            partial_path(Some(&PathBuf::from("tests/grammars/foo.pest")), "base")
+                .to_str()
+                .unwrap()
+        );
+
+        assert_eq!(
+            "tests/grammars/base.pest",
+            partial_path(Some(&PathBuf::from("tests/grammars/foo.pest")), "base.pest")
+                .to_str()
+                .unwrap()
+        );
+
+        let root = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into());
+        assert_eq!(
+            std::path::Path::new(&root).join("base.pest"),
+            partial_path(None, "base.pest")
+        );
+    }
 }
diff --git a/grammars/src/grammars/_base.pest b/grammars/src/grammars/_base.pest
new file mode 100644
index 00000000..941f3249
--- /dev/null
+++ b/grammars/src/grammars/_base.pest
@@ -0,0 +1 @@
+WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
\ No newline at end of file
diff --git a/grammars/src/grammars/json.pest b/grammars/src/grammars/json.pest
index f8b423a5..ffd982ea 100644
--- a/grammars/src/grammars/json.pest
+++ b/grammars/src/grammars/json.pest
@@ -6,6 +6,7 @@
 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
+use _base.pest
 
 json = { SOI ~ (object | array) ~ EOI }
 
@@ -28,5 +29,3 @@ exp    = @{ ("E" | "e") ~ ("+" | "-")? ~ ASCII_DIGIT+ }
 bool = { "true" | "false" }
 
 null = { "null" }
-
-WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
diff --git a/grammars/src/grammars/toml.pest b/grammars/src/grammars/toml.pest
index b929cef6..c3aa4828 100644
--- a/grammars/src/grammars/toml.pest
+++ b/grammars/src/grammars/toml.pest
@@ -6,6 +6,7 @@
 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
+use ./_base
 
 toml = { SOI ~ (table | array_table | pair)* ~ EOI }
 
@@ -70,5 +71,4 @@ exp     = @{ ("E" | "e") ~ ("+" | "-")? ~ int }
 
 boolean = { "true" | "false" }
 
-WHITESPACE = _{ " " | "\t" | NEWLINE }
 COMMENT    = _{ "#" ~ (!NEWLINE ~ ANY)* }
diff --git a/meta/src/grammar.pest b/meta/src/grammar.pest
index 282ca35b..8873aa9f 100644
--- a/meta/src/grammar.pest
+++ b/meta/src/grammar.pest
@@ -11,7 +11,8 @@ grammar_rules = _{ SOI ~ grammar_rule+ ~ EOI }
 
 grammar_rule = {
     identifier ~ assignment_operator ~ modifier? ~
-    opening_brace ~ expression ~ closing_brace
+    opening_brace ~ expression ~ closing_brace |
+    _use
 }
 
 assignment_operator = { "=" }
@@ -96,3 +97,6 @@ newline    = _{ "\n" | "\r\n" }
 WHITESPACE = _{ " " | "\t" | newline }
 block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
 COMMENT    = _{ block_comment | ("//" ~ (!newline ~ ANY)*) }
+
+_use = ${ "use" ~ " "+ ~ path }
+path = @{ (!newline ~ ANY)* ~ ".pest"? }
\ No newline at end of file
diff --git a/meta/src/parser.rs b/meta/src/parser.rs
index fc0224b3..925315fb 100644
--- a/meta/src/parser.rs
+++ b/meta/src/parser.rs
@@ -243,6 +243,7 @@ pub fn rename_meta_rule(rule: &Rule) -> String {
         Rule::insensitive_string => "`^`".to_owned(),
         Rule::range_operator => "`..`".to_owned(),
         Rule::single_quote => "`'`".to_owned(),
+        Rule::_use => "use".to_owned(),
         other_rule => format!("{:?}", other_rule),
     }
 }
@@ -1093,13 +1094,38 @@ mod tests {
         };
     }
 
+    #[test]
+    fn test_use() {
+        parses_to! {
+            parser: PestParser,
+            input: "use foo",
+            rule: Rule::_use,
+            tokens: [
+                _use(0, 7, [
+                    path(4, 7),
+                ])
+            ]
+        };
+
+        parses_to! {
+            parser: PestParser,
+            input: "use  foo.bar.pest",
+            rule: Rule::_use,
+            tokens: [
+                _use(0, 17, [
+                    path(5, 17),
+                ])
+            ]
+        };
+    }
+
     #[test]
     fn wrong_identifier() {
         fails_with! {
             parser: PestParser,
             input: "0",
             rule: Rule::grammar_rules,
-            positives: vec![Rule::identifier],
+            positives: vec![Rule::grammar_rule],
             negatives: vec![],
             pos: 0
         };