From afe4d28ad9e768d7226a97dba7825cd4ea48bc2d Mon Sep 17 00:00:00 2001
From: Jonathan <vanillajonathan@users.noreply.github.com>
Date: Thu, 21 Sep 2023 00:15:39 +0200
Subject: [PATCH] feat(Lezer grammar): Fix support for ranges (#3549)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
---
 grammars/prql-lezer/src/prql.grammar | 16 ++++++++++------
 grammars/prql-lezer/test/misc.txt    |  7 +++++++
 2 files changed, 17 insertions(+), 6 deletions(-)
 create mode 100644 grammars/prql-lezer/test/misc.txt

diff --git a/grammars/prql-lezer/src/prql.grammar b/grammars/prql-lezer/src/prql.grammar
index 02448a1b278d..fda2da799e1d 100644
--- a/grammars/prql-lezer/src/prql.grammar
+++ b/grammars/prql-lezer/src/prql.grammar
@@ -38,7 +38,7 @@ Assign_call { ident_part "=" expr_call }
 Case_branch { Expr "=>" Expr }
 Array { "[" newline* Expr (("," newline* ) Expr)* ","? newline* "]" }
 // Possibly we could only accept case branches inside the Tuple?
-Case {  @specialize<ident_part, "case"> Tuple }
+Case { @specialize<ident_part, "case"> Tuple }
 
 Nested_pipeline { "(" newline* Pipeline ~ambig_newline newline? ")" }
 
@@ -53,13 +53,13 @@ Ident { ident_part ( "." (ident_part | "*"))* }
 Op_bin { op_bin_only | !term op_unary }
 
 @tokens {
-  Date { @digit+ "-" @digit+ "-" @digit+ }
-  Time { @digit+ ":" @digit+ ( ":" @digit+ ( "." @digit+ )? )? }
+  date { @digit+ "-" @digit+ "-" @digit+ }
+  time { @digit+ ":" @digit+ ( ":" @digit+ ( "." @digit+ )? )? }
   // We can't seem to set the number of digits, so this will allow any
   // combination of digits & hyphens.
   // TODO: does excluding spaces here work? It seems to make a difference, but
   // then I think the `Number` rule doesn't allow spaces...
-  DateTime { "@" ![ ] ( Date | Time | Date "T" Time ( "Z" | ( "-" | "+" ) @digit+ ":" @digit+ )? ) }
+  DateTime { "@" ![ ] ( date | time | date "T" time ( "Z" | ( "-" | "+" ) @digit+ ":" @digit+ )? ) }
   identifier_char { @asciiLetter | $[_\u{a1}-\u{10ffff}] }
   ident_part { identifier_char (identifier_char | "_" | @digit )* }
   // TODO: This is not as precise as PRQL, which doesn't allow trailing
@@ -74,7 +74,9 @@ Op_bin { op_bin_only | !term op_unary }
   line_wrap { "\\" }
   wrapped_line { newline+ (Comment newline+)* line_wrap }
   newline { "\n" }
-  Range { ".." }
+  // TODO: Because this can also be used to compile to BETWEEN, ranges should
+  // allow any literal, and arguably any expression. And if possible it shouldn't allow for spaces.
+  Range { @digit+ ".." @digit+ }
   // Couldn't managed to do these & the interpolated as a template; couldn't
   // find how to negate a variable template
   String { $["] !["]* $["] | $['] ![']* $['] }
@@ -96,9 +98,11 @@ Op_bin { op_bin_only | !term op_unary }
   F_string {Interpolated_string<'f'>}
 
   // We need to give precedence to `Op_bin` so we don't get `x+y` as `x` & `+y`.
-  // S & F strings have precedence over idents beginning with s / f (we could
+  // R, S & F strings have precedence over idents beginning with r / s / f (we could
   // use specialize but I think means we need to redefine `String` for each)
   @precedence { R_string, S_string, F_string, op_bin_only, ident_part }
+
+  @precedence { Range, Number }
 }
 
 Def { @specialize<ident_part, "let"> ident_part "=" (Nested_pipeline (newline+ | end) | Lambda) }
diff --git a/grammars/prql-lezer/test/misc.txt b/grammars/prql-lezer/test/misc.txt
new file mode 100644
index 000000000000..d2b7423cc6d8
--- /dev/null
+++ b/grammars/prql-lezer/test/misc.txt
@@ -0,0 +1,7 @@
+# 10..20
+
+10..20
+
+==>
+
+Query(Statements(Pipeline_stmt(Pipeline(Expr(Range)))))