-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ESQL: Improve grammar to allow identifiers with . #100740
Changes from 9 commits
d500538
6d7eaa7
0e4ecb1
93cd110
5affc07
b6ba7e8
d4a0029
64f191b
9a51c58
96d1675
830b436
5865a2c
649f769
a047074
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pr: 100740 | ||
summary: "ESQL: Improve grammar to allow identifiers with" | ||
area: ES|QL | ||
type: bug | ||
issues: | ||
- 100312 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -139,9 +139,9 @@ emp_no:integer | birth_date:date | x:date | |
; | ||
|
||
evalDateTruncGrouping | ||
from employees | eval y = date_trunc(1 year, hire_date) | stats count(emp_no) by y | sort y | keep y, count(emp_no) | limit 5; | ||
from employees | eval y = date_trunc(1 year, hire_date) | stats c = count(emp_no) by y | sort y | keep y, c | limit 5; | ||
|
||
y:date | count(emp_no):long | ||
y:date | c:long | ||
Comment on lines
-142
to
+144
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A side effect of this change - the expression has to be quoted or an alias used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have kept both variants in the query: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is a positive side effect; being able to use an unquoted function-like alias seems inconsistent |
||
1985-01-01T00:00:00.000Z | 11 | ||
1986-01-01T00:00:00.000Z | 11 | ||
1987-01-01T00:00:00.000Z | 15 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,24 @@ | ||
lexer grammar EsqlBaseLexer; | ||
|
||
DISSECT : 'dissect' -> pushMode(EXPRESSION); | ||
DROP : 'drop' -> pushMode(SOURCE_IDENTIFIERS); | ||
ENRICH : 'enrich' -> pushMode(SOURCE_IDENTIFIERS); | ||
EVAL : 'eval' -> pushMode(EXPRESSION); | ||
EXPLAIN : 'explain' -> pushMode(EXPLAIN_MODE); | ||
FROM : 'from' -> pushMode(SOURCE_IDENTIFIERS); | ||
GROK : 'grok' -> pushMode(EXPRESSION); | ||
INLINESTATS : 'inlinestats' -> pushMode(EXPRESSION); | ||
KEEP : 'keep' -> pushMode(SOURCE_IDENTIFIERS); | ||
LIMIT : 'limit' -> pushMode(EXPRESSION); | ||
MV_EXPAND : 'mv_expand' -> pushMode(SOURCE_IDENTIFIERS); | ||
PROJECT : 'project' -> pushMode(SOURCE_IDENTIFIERS); | ||
RENAME : 'rename' -> pushMode(SOURCE_IDENTIFIERS); | ||
ROW : 'row' -> pushMode(EXPRESSION); | ||
SHOW : 'show' -> pushMode(EXPRESSION); | ||
SORT : 'sort' -> pushMode(EXPRESSION); | ||
STATS : 'stats' -> pushMode(EXPRESSION); | ||
WHERE : 'where' -> pushMode(EXPRESSION); | ||
UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION); | ||
DISSECT : 'dissect' -> pushMode(EXPRESSION_MODE); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main change - commands are split to have their own mode; due to their particularities there are about 7 modes... |
||
DROP : 'drop' -> pushMode(PROJECT_MODE); | ||
ENRICH : 'enrich' -> pushMode(ENRICH_MODE); | ||
EVAL : 'eval' -> pushMode(EXPRESSION_MODE); | ||
EXPLAIN : 'explain' -> pushMode(EXPLAIN_MODE); | ||
FROM : 'from' -> pushMode(FROM_MODE); | ||
GROK : 'grok' -> pushMode(EXPRESSION_MODE); | ||
INLINESTATS : 'inlinestats' -> pushMode(EXPRESSION_MODE); | ||
KEEP : 'keep' -> pushMode(PROJECT_MODE); | ||
LIMIT : 'limit' -> pushMode(EXPRESSION_MODE); | ||
MV_EXPAND : 'mv_expand' -> pushMode(MVEXPAND_MODE); | ||
PROJECT : 'project' -> pushMode(PROJECT_MODE); | ||
RENAME : 'rename' -> pushMode(RENAME_MODE); | ||
ROW : 'row' -> pushMode(EXPRESSION_MODE); | ||
SHOW : 'show' -> pushMode(SHOW_MODE); | ||
SORT : 'sort' -> pushMode(EXPRESSION_MODE); | ||
STATS : 'stats' -> pushMode(EXPRESSION_MODE); | ||
WHERE : 'where' -> pushMode(EXPRESSION_MODE); | ||
UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION_MODE); | ||
|
||
LINE_COMMENT | ||
: '//' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) | ||
|
@@ -31,16 +31,20 @@ MULTILINE_COMMENT | |
WS | ||
: [ \r\n\t]+ -> channel(HIDDEN) | ||
; | ||
|
||
|
||
// | ||
// Explain | ||
// | ||
mode EXPLAIN_MODE; | ||
EXPLAIN_OPENING_BRACKET : '[' -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE); | ||
EXPLAIN_PIPE : '|' -> type(PIPE), popMode; | ||
EXPLAIN_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE); | ||
EXPLAIN_PIPE : PIPE -> type(PIPE), popMode; | ||
EXPLAIN_WS : WS -> channel(HIDDEN); | ||
EXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN); | ||
EXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN); | ||
Comment on lines
-37
to
42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Used this approach elsewhere in trying to reuse the existing token declarations. |
||
|
||
mode EXPRESSION; | ||
// | ||
// Expression - used by most command | ||
// | ||
mode EXPRESSION_MODE; | ||
|
||
PIPE : '|' -> popMode; | ||
|
||
|
@@ -64,6 +68,27 @@ fragment EXPONENT | |
: [Ee] [+-]? DIGIT+ | ||
; | ||
|
||
fragment ASPERAND | ||
: '@' | ||
; | ||
|
||
fragment BACKQUOTE | ||
: '`' | ||
; | ||
|
||
fragment BACKQUOTE_BLOCK | ||
: ~'`' | ||
| '``' | ||
; | ||
|
||
fragment UNDERSCORE | ||
: '_' | ||
; | ||
|
||
fragment UNQUOTED_ID_BODY | ||
: (LETTER | DIGIT | UNDERSCORE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ to removing the dot again from the unquoted identifier. I agree that dots will likely need to retain special meaning to navigate nested objects. If I understand correctly, this should also bring back the ability to quote just part of an identifier, e.g.
|
||
; | ||
|
||
STRING | ||
: '"' (ESCAPE_SEQUENCE | UNESCAPED_CHARS)* '"' | ||
| '"""' (~[\r\n])*? '"""' '"'? '"'? | ||
|
@@ -103,8 +128,6 @@ PARAM: '?'; | |
RLIKE: 'rlike'; | ||
RP : ')'; | ||
TRUE : 'true'; | ||
INFO : 'info'; | ||
FUNCTIONS : 'functions'; | ||
|
||
EQ : '=='; | ||
NEQ : '!='; | ||
|
@@ -124,19 +147,18 @@ PERCENT : '%'; | |
// mode. Thus, the two popModes on CLOSING_BRACKET. The other way could as | ||
// the start of a multivalued field constant. To line up with the double pop | ||
// the explain mode needs, we double push when we see that. | ||
OPENING_BRACKET : '[' -> pushMode(EXPRESSION), pushMode(EXPRESSION); | ||
OPENING_BRACKET : '[' -> pushMode(EXPRESSION_MODE), pushMode(EXPRESSION_MODE); | ||
CLOSING_BRACKET : ']' -> popMode, popMode; | ||
|
||
|
||
UNQUOTED_IDENTIFIER | ||
: LETTER (LETTER | DIGIT | '_')* | ||
: LETTER UNQUOTED_ID_BODY* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will allow digits inside "subfields", but not on the root field. For example There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to allow unquoted identifiers that start with digits, though? If so, we'd at least have to disallow identifiers being all digits, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
ES accepts both fields There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which we accept as long as the fields are quoted "123" - same with field above "a.1m.4321" is accepted; the problem is handling fields which are NOT quoted. |
||
// only allow @ at beginning of identifier to keep the option to allow @ as infix operator in the future | ||
// also, single `_` and `@` characters are not valid identifiers | ||
| ('_' | '@') (LETTER | DIGIT | '_')+ | ||
| (UNDERSCORE | ASPERAND) UNQUOTED_ID_BODY+ | ||
; | ||
|
||
QUOTED_IDENTIFIER | ||
: '`' ( ~'`' | '``' )* '`' | ||
: BACKQUOTE BACKQUOTE_BLOCK+ BACKQUOTE | ||
; | ||
|
||
EXPR_LINE_COMMENT | ||
|
@@ -150,42 +172,204 @@ EXPR_MULTILINE_COMMENT | |
EXPR_WS | ||
: WS -> channel(HIDDEN) | ||
; | ||
// | ||
// FROM command | ||
// | ||
mode FROM_MODE; | ||
FROM_PIPE : PIPE -> type(PIPE), popMode; | ||
FROM_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(FROM_MODE), pushMode(FROM_MODE); | ||
FROM_CLOSING_BRACKET : CLOSING_BRACKET -> type(CLOSING_BRACKET), popMode, popMode; | ||
FROM_COMMA : COMMA -> type(COMMA); | ||
FROM_ASSIGN : ASSIGN -> type(ASSIGN); | ||
Comment on lines
+178
to
+183
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Type reuse and consistency. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to push/pop There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's because of the double push the opening_bracket is doing: https://github.com/elastic/elasticsearch/pull/100740/files#diff-e78771d02134981fe1563249a0fbbdabf95b40299b2086dd19fccef6e87f39c4R150 |
||
|
||
METADATA: 'metadata'; | ||
|
||
fragment FROM_UNQUOTED_IDENTIFIER_PART | ||
: ~[=`|,[\]/ \t\r\n] | ||
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment | ||
; | ||
|
||
mode SOURCE_IDENTIFIERS; | ||
FROM_UNQUOTED_IDENTIFIER | ||
: FROM_UNQUOTED_IDENTIFIER_PART+ | ||
; | ||
|
||
FROM_QUOTED_IDENTIFIER | ||
: QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER) | ||
; | ||
|
||
FROM_LINE_COMMENT | ||
: LINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
FROM_MULTILINE_COMMENT | ||
: MULTILINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
FROM_WS | ||
: WS -> channel(HIDDEN) | ||
; | ||
// | ||
// DROP, KEEP, PROJECT | ||
// | ||
mode PROJECT_MODE; | ||
PROJECT_PIPE : PIPE -> type(PIPE), popMode; | ||
PROJECT_DOT: DOT -> type(DOT); | ||
PROJECT_COMMA : COMMA -> type(COMMA); | ||
|
||
fragment UNQUOTED_ID_BODY_WITH_PATTERN | ||
: (LETTER | DIGIT | UNDERSCORE | ASTERISK) | ||
; | ||
|
||
PROJECT_UNQUOTED_IDENTIFIER | ||
: (LETTER | ASTERISK) UNQUOTED_ID_BODY_WITH_PATTERN* | ||
| (UNDERSCORE | ASPERAND) UNQUOTED_ID_BODY_WITH_PATTERN+ | ||
; | ||
Comment on lines
+223
to
+226
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Allow * in unused field declaration in keep, drop and project... |
||
|
||
PROJECT_QUOTED_IDENTIFIER | ||
: QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER) | ||
; | ||
|
||
PROJECT_LINE_COMMENT | ||
: LINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
PROJECT_MULTILINE_COMMENT | ||
: MULTILINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
PROJECT_WS | ||
: WS -> channel(HIDDEN) | ||
; | ||
// | ||
// | RENAME a.b AS x, c AS y | ||
// | ||
mode RENAME_MODE; | ||
RENAME_PIPE : PIPE -> type(PIPE), popMode; | ||
RENAME_ASSIGN : ASSIGN -> type(ASSIGN); | ||
RENAME_COMMA : COMMA -> type(COMMA); | ||
RENAME_DOT: DOT -> type(DOT); | ||
|
||
SRC_PIPE : '|' -> type(PIPE), popMode; | ||
SRC_OPENING_BRACKET : '[' -> type(OPENING_BRACKET), pushMode(SOURCE_IDENTIFIERS), pushMode(SOURCE_IDENTIFIERS); | ||
SRC_CLOSING_BRACKET : ']' -> popMode, popMode, type(CLOSING_BRACKET); | ||
SRC_COMMA : ',' -> type(COMMA); | ||
SRC_ASSIGN : '=' -> type(ASSIGN); | ||
AS : 'as'; | ||
METADATA: 'metadata'; | ||
ON : 'on'; | ||
WITH : 'with'; | ||
|
||
SRC_UNQUOTED_IDENTIFIER | ||
: SRC_UNQUOTED_IDENTIFIER_PART+ | ||
RENAME_QUOTED_IDENTIFIER | ||
: QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER) | ||
; | ||
|
||
fragment SRC_UNQUOTED_IDENTIFIER_PART | ||
: ~[=`|,[\]/ \t\r\n]+ | ||
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment | ||
// use the unquoted pattern to let the parser invalidate fields with * | ||
RENAME_UNQUOTED_IDENTIFIER | ||
: PROJECT_UNQUOTED_IDENTIFIER -> type(PROJECT_UNQUOTED_IDENTIFIER) | ||
; | ||
|
||
RENAME_LINE_COMMENT | ||
: LINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
SRC_QUOTED_IDENTIFIER | ||
: QUOTED_IDENTIFIER | ||
RENAME_MULTILINE_COMMENT | ||
: MULTILINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
SRC_LINE_COMMENT | ||
RENAME_WS | ||
: WS -> channel(HIDDEN) | ||
; | ||
|
||
// | ENRICH ON key WITH fields | ||
mode ENRICH_MODE; | ||
ENRICH_PIPE : PIPE -> type(PIPE), popMode; | ||
|
||
ON : 'on' -> pushMode(ENRICH_FIELD_MODE); | ||
WITH : 'with' -> pushMode(ENRICH_FIELD_MODE); | ||
|
||
// use the unquoted pattern to let the parser invalidate fields with * | ||
ENRICH_POLICY_UNQUOTED_IDENTIFIER | ||
: FROM_UNQUOTED_IDENTIFIER -> type(FROM_UNQUOTED_IDENTIFIER) | ||
; | ||
|
||
ENRICH_QUOTED_IDENTIFIER | ||
: QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER) | ||
; | ||
|
||
ENRICH_LINE_COMMENT | ||
: LINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
ENRICH_MULTILINE_COMMENT | ||
: MULTILINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
ENRICH_WS | ||
: WS -> channel(HIDDEN) | ||
; | ||
|
||
// submode for Enrich to allow different lexing between policy identifier (loose) and field identifiers | ||
mode ENRICH_FIELD_MODE; | ||
ENRICH_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode; | ||
ENRICH_FIELD_ASSIGN : ASSIGN -> type(ASSIGN); | ||
ENRICH_FIELD_COMMA : COMMA -> type(COMMA); | ||
ENRICH_FIELD_DOT: DOT -> type(DOT); | ||
|
||
ENRICH_FIELD_WITH : WITH -> type(WITH) ; | ||
|
||
ENRICH_FIELD_UNQUOTED_IDENTIFIER | ||
: PROJECT_UNQUOTED_IDENTIFIER -> type(PROJECT_UNQUOTED_IDENTIFIER) | ||
; | ||
|
||
ENRICH_FIELD_QUOTED_IDENTIFIER | ||
: QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER) | ||
; | ||
|
||
ENRICH_FIELD_LINE_COMMENT | ||
: LINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
ENRICH_FIELD_MULTILINE_COMMENT | ||
: MULTILINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
ENRICH_FIELD_WS | ||
: WS -> channel(HIDDEN) | ||
; | ||
|
||
mode MVEXPAND_MODE; | ||
MVEXPAND_PIPE : PIPE -> type(PIPE), popMode; | ||
MVEXPAND_DOT: DOT -> type(DOT); | ||
|
||
MVEXPAND_QUOTED_IDENTIFIER | ||
: QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER) | ||
; | ||
|
||
MVEXPAND_UNQUOTED_IDENTIFIER | ||
: UNQUOTED_IDENTIFIER -> type(UNQUOTED_IDENTIFIER) | ||
; | ||
|
||
MVEXPAND_LINE_COMMENT | ||
: LINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
MVEXPAND_MULTILINE_COMMENT | ||
: MULTILINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
MVEXPAND_WS | ||
: WS -> channel(HIDDEN) | ||
; | ||
|
||
// | ||
// SHOW INFO | ||
// | ||
mode SHOW_MODE; | ||
SHOW_PIPE : PIPE -> type(PIPE), popMode; | ||
|
||
INFO : 'info'; | ||
FUNCTIONS : 'functions'; | ||
|
||
SHOW_LINE_COMMENT | ||
: LINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
SRC_MULTILINE_COMMENT | ||
SHOW_MULTILINE_COMMENT | ||
: MULTILINE_COMMENT -> channel(HIDDEN) | ||
; | ||
|
||
SRC_WS | ||
SHOW_WS | ||
: WS -> channel(HIDDEN) | ||
; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changelog didn't like the period.