Skip to content

Commit

Permalink
EQL grammar updates and tests (#49658)
Browse files Browse the repository at this point in the history
* EQL: Additional tests and grammar updates
* EQL: Add backtick escaped identifiers
* EQL: Adding keywords to language
* EQL: Add checks for unsupported syntax
* EQL: Testing updates and PR feedback
* EQL: Add string escapes
* EQL: Cleanup grammar for identifier
* EQL: Remove tabs from .eql tests
  • Loading branch information
rw-access authored Jan 27, 2020
1 parent 0483f7c commit 6f1890b
Show file tree
Hide file tree
Showing 16 changed files with 2,280 additions and 1,426 deletions.
150 changes: 72 additions & 78 deletions x-pack/plugin/eql/src/main/antlr/EqlBase.g4
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

grammar EqlBase;

tokens {
DELIMITER
}

singleStatement
: statement EOF
Expand All @@ -19,45 +16,54 @@ singleExpression
;

statement
: query (PIPE pipe)*
: query pipe*
;

query
: sequence
| join
| condition
| eventQuery
;

sequenceParams
: WITH (MAXSPAN EQ timeUnit)
;

sequence
: SEQUENCE (by=joinKeys)? (span)?
match+
(UNTIL match)?
: SEQUENCE (by=joinKeys sequenceParams? | sequenceParams by=joinKeys?)?
sequenceTerm sequenceTerm+
(UNTIL sequenceTerm)?
;

join
: JOIN (by=joinKeys)?
match+
(UNTIL match)?
joinTerm joinTerm+
(UNTIL joinTerm)?
;

pipe
: kind=IDENTIFIER (booleanExpression (COMMA booleanExpression)*)?
: PIPE kind=IDENTIFIER (booleanExpression (COMMA booleanExpression)*)?
;


joinKeys
: BY qualifiedNames
;

span
: WITH MAXSPAN EQ DIGIT_IDENTIFIER
: BY expression (COMMA expression)*
;

match
: LB condition RB (by=joinKeys)?
joinTerm
: subquery (by=joinKeys)?
;

sequenceTerm
: subquery (FORK (EQ booleanValue)?)? (by=joinKeys)?
;

subquery
: LB eventQuery RB
;

condition
: event=qualifiedName WHERE expression
eventQuery
: event=identifier WHERE expression
;

expression
Expand All @@ -66,6 +72,7 @@ expression

booleanExpression
: NOT booleanExpression #logicalNot
| relationship=IDENTIFIER OF subquery #processCheck
| predicated #booleanDefault
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
Expand All @@ -81,9 +88,7 @@ predicated
// dedicated calls for each branch are not used to reuse the NOT handling across them
// instead the property kind is used for differentiation
predicate
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
| NOT? kind=IN LP valueExpression (COMMA valueExpression)* RP
| NOT? kind=IN LP query RP
: NOT? kind=IN LP valueExpression (COMMA valueExpression)* RP
;

valueExpression
Expand All @@ -102,14 +107,14 @@ primaryExpression
;

functionExpression
: identifier LP (expression (COMMA expression)*)? RP
: name=IDENTIFIER LP (expression (COMMA expression)*)? RP
;

constant
: NULL #nullLiteral
| number #numericLiteral
| booleanValue #booleanLiteral
| STRING+ #stringLiteral
| string #stringLiteral
;

comparisonOperator
Expand All @@ -120,26 +125,17 @@ booleanValue
: TRUE | FALSE
;

qualifiedNames
: qualifiedName (COMMA qualifiedName)*
;

qualifiedName
: (identifier DOT)* identifier
: identifier (DOT identifier | LB INTEGER_VALUE+ RB)*
;

identifier
: quoteIdentifier
| unquoteIdentifier
: IDENTIFIER
| ESCAPED_IDENTIFIER
;

quoteIdentifier
: QUOTED_IDENTIFIER #quotedIdentifier
;

unquoteIdentifier
: IDENTIFIER #unquotedIdentifier
| DIGIT_IDENTIFIER #digitIdentifier
timeUnit
: number unit=IDENTIFIER?
;

number
Expand All @@ -151,31 +147,26 @@ string
: STRING
;

AND: 'AND';
ANY: 'ANY';
ASC: 'ASC';
BETWEEN: 'BETWEEN';
BY: 'BY';
CHILD: 'CHILD';
DESCENDANT: 'DESCENDANT';
EVENT: 'EVENT';
FALSE: 'FALSE';
IN: 'IN';
JOIN: 'JOIN';
MAXSPAN: 'MAXSPAN';
NOT: 'NOT';
NULL: 'NULL';
OF: 'OF';
OR: 'OR';
SEQUENCE: 'SEQUENCE';
TRUE: 'TRUE';
UNTIL: 'UNTIL';
WHERE: 'WHERE';
WITH: 'WITH';
AND: 'and';
BY: 'by';
FALSE: 'false';
FORK: 'fork';
IN: 'in';
JOIN: 'join';
MAXSPAN: 'maxspan';
NOT: 'not';
NULL: 'null';
OF: 'of';
OR: 'or';
SEQUENCE: 'sequence';
TRUE: 'true';
UNTIL: 'until';
WHERE: 'where';
WITH: 'with';

// Operators
EQ : '=' | '==';
NEQ : '<>' | '!=';
NEQ : '!=';
LT : '<';
LTE : '<=';
GT : '>';
Expand All @@ -194,9 +185,16 @@ LP: '(';
RP: ')';
PIPE: '|';


ESCAPED_IDENTIFIER
: '`' (~'`')* '`'
;

STRING
: '\'' ( ~'\'')* '\''
| '"' ( ~'"' )* '"'
: '\'' ('\\' [btnfr"'\\] | ~[\r\n'\\])* '\''
| '"' ('\\' [btnfr"'\\] | ~[\r\n"\\])* '"'
| '?"' ('\\"' |~["\r\n])* '"'
| '?\'' ('\\\'' |~['\r\n])* '\''
;
INTEGER_VALUE
Expand All @@ -210,31 +208,24 @@ DECIMAL_VALUE
| DOT DIGIT+ EXPONENT
;
// make @timestamp not require escaping, since @ has no other meaning
IDENTIFIER
: (LETTER | '_') (LETTER | DIGIT | '_' | '@' )*
;

DIGIT_IDENTIFIER
: DIGIT (LETTER | DIGIT | '_' | '@')+
: (LETTER | '_' | '@') (LETTER | DIGIT | '_')*
;
QUOTED_IDENTIFIER
: '"' ( ~'"' | '""' )* '"'
;

fragment EXPONENT
: 'E' [+-]? DIGIT+
: [Ee] [+-]? DIGIT+
;
fragment DIGIT
: [0-9]
;
fragment LETTER
: [A-Z]
: [A-Za-z]
;
SIMPLE_COMMENT
LINE_COMMENT
: '//' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
;
Expand All @@ -246,9 +237,12 @@ WS
: [ \r\n\t]+ -> channel(HIDDEN)
;
// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
/*
UNRECOGNIZED
: .
;
;
*/
87 changes: 0 additions & 87 deletions x-pack/plugin/eql/src/main/antlr/EqlBase.tokens

This file was deleted.

Loading

0 comments on commit 6f1890b

Please sign in to comment.