From afe4d28ad9e768d7226a97dba7825cd4ea48bc2d Mon Sep 17 00:00:00 2001 From: Jonathan Date: Thu, 21 Sep 2023 00:15:39 +0200 Subject: [PATCH] feat(Lezer grammar): Fix support for ranges (#3549) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- grammars/prql-lezer/src/prql.grammar | 16 ++++++++++------ grammars/prql-lezer/test/misc.txt | 7 +++++++ 2 files changed, 17 insertions(+), 6 deletions(-) create mode 100644 grammars/prql-lezer/test/misc.txt diff --git a/grammars/prql-lezer/src/prql.grammar b/grammars/prql-lezer/src/prql.grammar index 02448a1b278d..fda2da799e1d 100644 --- a/grammars/prql-lezer/src/prql.grammar +++ b/grammars/prql-lezer/src/prql.grammar @@ -38,7 +38,7 @@ Assign_call { ident_part "=" expr_call } Case_branch { Expr "=>" Expr } Array { "[" newline* Expr (("," newline* ) Expr)* ","? newline* "]" } // Possibly we could only accept case branches inside the Tuple? -Case { @specialize Tuple } +Case { @specialize Tuple } Nested_pipeline { "(" newline* Pipeline ~ambig_newline newline? ")" } @@ -53,13 +53,13 @@ Ident { ident_part ( "." (ident_part | "*"))* } Op_bin { op_bin_only | !term op_unary } @tokens { - Date { @digit+ "-" @digit+ "-" @digit+ } - Time { @digit+ ":" @digit+ ( ":" @digit+ ( "." @digit+ )? )? } + date { @digit+ "-" @digit+ "-" @digit+ } + time { @digit+ ":" @digit+ ( ":" @digit+ ( "." @digit+ )? )? } // We can't seem to set the number of digits, so this will allow any // combination of digits & hyphens. // TODO: does excluding spaces here work? It seems to make a difference, but // then I think the `Number` rule doesn't allow spaces... - DateTime { "@" ![ ] ( Date | Time | Date "T" Time ( "Z" | ( "-" | "+" ) @digit+ ":" @digit+ )? ) } + DateTime { "@" ![ ] ( date | time | date "T" time ( "Z" | ( "-" | "+" ) @digit+ ":" @digit+ )? ) } identifier_char { @asciiLetter | $[_\u{a1}-\u{10ffff}] } ident_part { identifier_char (identifier_char | "_" | @digit )* } // TODO: This is not as precise as PRQL, which doesn't allow trailing @@ -74,7 +74,9 @@ Op_bin { op_bin_only | !term op_unary } line_wrap { "\\" } wrapped_line { newline+ (Comment newline+)* line_wrap } newline { "\n" } - Range { ".." } + // TODO: Because this can also be used to compile to BETWEEN, ranges should + // allow any literal, and arguably any expression. And if possible it shouldn't allow for spaces. + Range { @digit+ ".." @digit+ } // Couldn't managed to do these & the interpolated as a template; couldn't // find how to negate a variable template String { $["] !["]* $["] | $['] ![']* $['] } @@ -96,9 +98,11 @@ Op_bin { op_bin_only | !term op_unary } F_string {Interpolated_string<'f'>} // We need to give precedence to `Op_bin` so we don't get `x+y` as `x` & `+y`. - // S & F strings have precedence over idents beginning with s / f (we could + // R, S & F strings have precedence over idents beginning with r / s / f (we could // use specialize but I think means we need to redefine `String` for each) @precedence { R_string, S_string, F_string, op_bin_only, ident_part } + + @precedence { Range, Number } } Def { @specialize ident_part "=" (Nested_pipeline (newline+ | end) | Lambda) } diff --git a/grammars/prql-lezer/test/misc.txt b/grammars/prql-lezer/test/misc.txt new file mode 100644 index 000000000000..d2b7423cc6d8 --- /dev/null +++ b/grammars/prql-lezer/test/misc.txt @@ -0,0 +1,7 @@ +# 10..20 + +10..20 + +==> + +Query(Statements(Pipeline_stmt(Pipeline(Expr(Range)))))