Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

[PPL] Create ANTLR parser and implement basic syntax - Phase0 #429

Merged
merged 26 commits into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
8a44cfc
m
chloe-zh Apr 6, 2020
204bb81
Merge branch 'feature/ppl' of github.com:opendistro-for-elasticsearch…
chloe-zh Apr 9, 2020
3bfdb28
Added antlr files
chloe-zh Apr 9, 2020
c2f63c0
Merge remote-tracking branch 'upstream/feature/ppl' into ppl-parser
chloe-zh Apr 10, 2020
084f380
Added wildcard
chloe-zh Apr 12, 2020
8c45b24
Implemented parser for commands of simple syntax
chloe-zh Apr 14, 2020
6b542eb
addressed comments
chloe-zh Apr 14, 2020
46bb1dc
Enabled parsing nested fields and arrays in field
chloe-zh Apr 16, 2020
80b0629
Added unit tests for nested fields
chloe-zh Apr 17, 2020
ddf9c5a
Added Java doc for classes
chloe-zh Apr 17, 2020
17a8503
Replaced the imported parser with static methods in the parser
chloe-zh Apr 17, 2020
784df88
Addressed comments
chloe-zh Apr 17, 2020
c251f03
Addressed comments
chloe-zh Apr 17, 2020
3a92f11
Addressed comments
chloe-zh Apr 18, 2020
412f6f0
changed LogicalPlan to UnresolvedPlan; added test for index name with…
chloe-zh Apr 20, 2020
5430902
Merge branch 'feature/ppl' of github.com:opendistro-for-elasticsearch…
chloe-zh Apr 20, 2020
bd56129
Reverted nest
chloe-zh Apr 21, 2020
9489e41
Removed constant and replaced it with literalValue in antlr parser
chloe-zh Apr 21, 2020
a898546
Simplified the accept methods
chloe-zh Apr 22, 2020
daf4767
Update
chloe-zh Apr 22, 2020
acb1575
Added builder pattern
chloe-zh Apr 23, 2020
2cc689b
Rolled back to remove builder
chloe-zh Apr 23, 2020
42609bc
update
chloe-zh Apr 23, 2020
092adbf
Revert "Added builder pattern"
chloe-zh Apr 23, 2020
e8c2eff
update
chloe-zh Apr 23, 2020
0c78efa
Merge remote-tracking branch 'upstream/feature/ppl' into ppl-parser
chloe-zh Apr 23, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ repositories {

dependencies {
compile "org.antlr:antlr4-runtime:4.7.1"
compile group: 'com.google.guava', name: 'guava', version:'15.0'

testCompile group: 'junit', name: 'junit', version: '4.12'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.common.utils;

import com.google.common.base.Strings;

public class StringUtils {
/**
* @param text string
* @param mark quotation mark
* @return An unquoted string whose outer pair of (single/double/back-tick) quotes have been removed
*/
public static String unquoteIdentifier(String text, String mark) {
if (isQuoted(text, mark)) {
return text.substring(mark.length(), text.length() - mark.length());
}
return text;
}

public static String unquoteIdentifier(String text) {
if (isQuoted(text, "\"") || isQuoted(text, "'") || isQuoted(text, "`")) {
return text.substring(1, text.length() - 1);
} else {
return text;
}
}

private static boolean isQuoted(String text, String mark) {
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark);
}
}
2 changes: 2 additions & 0 deletions ppl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ dependencies {
antlr "org.antlr:antlr4:4.7.1"

compile "org.antlr:antlr4-runtime:4.7.1"
compile group: 'com.google.guava', name: 'guava', version:'15.0'
compile group: 'org.elasticsearch', name: 'elasticsearch-x-content', version:"${es_version}"
compile group: 'org.json', name: 'json', version:'20180813'
compile group: 'org.springframework', name: 'spring-context', version: '5.2.5.RELEASE'
compile group: 'org.springframework', name: 'spring-beans', version: '5.2.5.RELEASE'
Expand Down
18 changes: 11 additions & 7 deletions ppl/src/main/antlr/OpenDistroPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ EXCLAMATION_SYMBOL: '!';
COLON: ':';
LT_PRTHS: '(';
RT_PRTHS: ')';
LT_SQR_PRTHS: '[';
RT_SQR_PRTHS: ']';
SINGLE_QUOTE: '\'';
DOUBLE_QUOTE: '"';
BACKTICK: '`';

// AGGREGATIONS
AVG: 'AVG';
Expand Down Expand Up @@ -240,16 +245,15 @@ WILDCARDQUERY: 'WILDCARDQUERY';
WILDCARD_QUERY: 'WILDCARD_QUERY';

// LITERALS AND VALUES
STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
//STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
ID: ID_LITERAL;
DOT_ID: '.' ID;
DECIMAL_LITERAL: DEC_DIGIT+;
// TODO: define WCFIELD
INTEGER_LITERAL: DEC_DIGIT+;
DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+;

fragment ID_LITERAL: [A-Z_$0-9@]*?[A-Z_$\-]+?[A-Z_$\-0-9]*;
fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
fragment DEC_DIGIT: [0-9];


Expand Down
108 changes: 48 additions & 60 deletions ppl/src/main/antlr/OpenDistroPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,31 @@ pplStatement
: searchCommand (PIPE commands)*
;


/** commands */
commands
: whereCommand | fieldsCommand | renameCommand | statsCommand | dedupCommand | sortCommand | evalCommand
;

searchCommand
: (SEARCH)? fromClause #searchWithoutFilter
| (SEARCH)? fromClause logicalExpression #searchFromClauseLogicExpr
| (SEARCH)? logicalExpression fromClause #searchLogicExprFromClause
: (SEARCH)? fromClause #searchFrom
chloe-zh marked this conversation as resolved.
Show resolved Hide resolved
| (SEARCH)? fromClause logicalExpression #searchFromFilter
| (SEARCH)? logicalExpression fromClause #searchFilterFrom
;

whereCommand
: WHERE evalExpression
: WHERE logicalExpression
;

fieldsCommand
: FIELDS fieldList
: FIELDS (PLUS | MINUS)? wcFieldList
;

renameCommand
: RENAME wcField AS wcField
: RENAME orignalField=wcFieldExpression AS renamedField=wcFieldExpression
;

statsCommand
: STATS (statsArgument)* (statsAggTerm | sparklineAggTerm) (byClause)? (dedupSplitValues)?
: STATS (statsArgument)* statsAggTerm (byClause)? (dedupSplitValues)?
;

dedupCommand
Expand All @@ -63,7 +62,7 @@ sortCommand
;

evalCommand
: EVAL fieldExpression EQUAL evalExpression (COMMA fieldExpression EQUAL evalExpression)
: EVAL evalExpression (COMMA evalExpression)*
;

/** arguments */
Expand Down Expand Up @@ -94,16 +93,12 @@ byClause
;

sortbyClause
: (PLUS | MINUS) sortField (COMMA (PLUS | MINUS) sortField)*
: (PLUS | MINUS)? sortField (COMMA (PLUS | MINUS)? sortField)*
;

/** aggregation terms */
aggregationTerm
: statsAggTerm | sparklineAggTerm
;

statsAggTerm
: statsFunction LT_PRTHS (fieldExpression)? RT_PRTHS
: statsFunction
;

sparklineAggTerm
Expand All @@ -116,8 +111,8 @@ statsFunction
;

aggregationFunction
: aggFunctionName LT_PRTHS fieldExpression RT_PRTHS
| percentileAggFunction
: aggFunctionName LT_PRTHS fieldExpression RT_PRTHS #aggFunctionCall
| percentileAggFunction #percentileAggFunctionCall
;

aggFunctionName
Expand All @@ -126,7 +121,7 @@ aggFunctionName
;

percentileAggFunction
: PERCENTILE LESS fieldExpression GREATER LT_PRTHS fieldExpression RT_PRTHS
: PERCENTILE LESS value=decimalLiteral GREATER LT_PRTHS aggField=fieldExpression RT_PRTHS
;

eventOrderFunction
Expand All @@ -148,16 +143,17 @@ timeFunctionName
;

sparklineAggregation
: SPARKLINE LT_PRTHS COUNT LT_PRTHS wcField RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
| SPARKLINE RT_PRTHS sparklineFunction LT_PRTHS wcField RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
: SPARKLINE LT_PRTHS COUNT LT_PRTHS wcFieldExpression RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
| SPARKLINE RT_PRTHS sparklineFunction LT_PRTHS wcFieldExpression RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
;

sparklineFunction
: sparklineFunctionName LT_PRTHS fieldExpression RT_PRTHS
;

sparklineFunctionName
: C | COUNT | DC | MEAN | AVG | STDEV | STDEVP | VAR | VARP | SUM | SUMSQ | MIN | MAX | RANGE
:
// | C | COUNT | DC | MEAN | AVG | STDEV | STDEVP | VAR | VARP | SUM | SUMSQ | MIN | MAX | RANGE
;

/** expressions */
Expand All @@ -173,35 +169,39 @@ logicalExpression
| comparisonExpression #comparsion
| evalExpression #eval
| NOT logicalExpression #logicalNot
| left=logicalExpression OR right=logicalExpression #logicalOrBinary
| left=logicalExpression (AND)? right=logicalExpression #logicalAndBinary
| left=logicalExpression OR right=logicalExpression #logicalOr
| left=logicalExpression (AND)? right=logicalExpression #logicalAnd
;

evalExpression
: literalValue EQUAL literalValue
| evalFunctionCall
: fieldExpression EQUAL evalFunctionCall
;

comparisonExpression
: left=fieldExpression comparisonOperator right=literalValue
| fieldExpression IN valueList
: left=fieldExpression comparisonOperator right=literalValue #compareExpr
| fieldExpression IN valueList #inExpr
;

booleanExpression
: LT_PRTHS booleanExpression RT_PRTHS
: LT_PRTHS booleanLiteral RT_PRTHS
| booleanLiteral
;

/** tables */
tableSource
: ident
| stringLiteral
;

/** fields */
fieldList
: fieldExpression (COMMA fieldExpression)*
;

wcFieldList
: wcFieldExpression (COMMA wcFieldExpression)*
;

sortField
: fieldExpression #defaultSort
| AUTO LT_PRTHS fieldExpression RT_PRTHS #autoSort
Expand All @@ -210,14 +210,18 @@ sortField
| NUM LT_PRTHS fieldExpression RT_PRTHS #numSort
;

wcField
: WCFIELD
;

fieldExpression
: ident
| SINGLE_QUOTE ident SINGLE_QUOTE
| DOUBLE_QUOTE ident DOUBLE_QUOTE
| BACKTICK ident BACKTICK
;

wcFieldExpression
: wildcard
;


/** functions */
evalFunctionCall
: evalFunctionName LT_PRTHS functionArgs RT_PRTHS
Expand Down Expand Up @@ -253,7 +257,7 @@ functionArgs
;

functionArg
: constant | fullColumnName | expression | evalFunctionCall
: expression | fieldExpression | literalValue
;

/** operators */
Expand All @@ -264,11 +268,17 @@ comparisonOperator
/** literals and values*/
literalValue
: stringLiteral
| decimalLiteral
| (PLUS | MINUS)? integerLiteral
| (PLUS | MINUS)? decimalLiteral
| booleanLiteral
;

stringLiteral
: STRING_LITERAL
: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING
;

integerLiteral
: INTEGER_LITERAL
;

decimalLiteral
Expand All @@ -283,32 +293,10 @@ valueList
: LT_PRTHS literalValue (COMMA literalValue)* RT_PRTHS
;

fullColumnName
: simpleId DOT_ID*
;

constant
: stringLiteral | decimalLiteral
| MINUS decimalLiteral
| booleanLiteral
;

simpleId
: ID
| DOT_ID
| STRING_LITERAL
;

ident
: ID
| DOT_ID
: (DOT)? ID
;

/** dataset */
datasetType
: DATAMODEL | LOOKUP | SAVEDSEARCH
;

datasetName
:
wildcard
: (MODULE | ident)+
;
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
* PPL Syntax Parser
*/
public class PPLSyntaxParser {
public ParseTree analyzeSyntax(String sql) {
OpenDistroPPLParser parser = createParser(createLexer(sql));
public ParseTree analyzeSyntax(String query) {
OpenDistroPPLParser parser = createParser(createLexer(query));
parser.addErrorListener(new SyntaxAnalysisErrorListener());
return parser.root();
}
Expand All @@ -38,8 +38,8 @@ private OpenDistroPPLParser createParser(Lexer lexer) {
new CommonTokenStream(lexer));
}

private OpenDistroPPLLexer createLexer(String sql) {
private OpenDistroPPLLexer createLexer(String query) {
return new OpenDistroPPLLexer(
new CaseInsensitiveCharStream(sql));
new CaseInsensitiveCharStream(query));
}
}
Loading