Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
[PPL] Create ANTLR parser and implement basic syntax - Phase0 (#429)
Browse files Browse the repository at this point in the history
* m

* Added antlr files

* Added wildcard

* Implemented parser for commands of simple syntax

* addressed comments

* Enabled parsing nested fields and arrays in field

* Added unit tests for nested fields

* Added Java doc for classes

* Replaced the imported parser with static methods in the parser

* Addressed comments

* Addressed comments

* Addressed comments

* changed LogicalPlan to UnresolvedPlan; added test for index name with dots; changed the funcName to String in Function expression

* Reverted nest

* Removed constant and replaced it with literalValue in antlr parser

* Simplified the accept methods

* Update

* Added builder pattern

* Rolled back to remove builder

* update

* Revert "Added builder pattern"

This reverts commit acb1575

* update
  • Loading branch information
chloe-zh authored Apr 23, 2020
1 parent 54049bd commit ded0421
Show file tree
Hide file tree
Showing 31 changed files with 1,956 additions and 71 deletions.
1 change: 1 addition & 0 deletions common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ repositories {

dependencies {
compile "org.antlr:antlr4-runtime:4.7.1"
compile group: 'com.google.guava', name: 'guava', version:'15.0'

testCompile group: 'junit', name: 'junit', version: '4.12'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.common.utils;

import com.google.common.base.Strings;

public class StringUtils {
/**
* @param text string
* @param mark quotation mark
* @return An unquoted string whose outer pair of (single/double/back-tick) quotes have been removed
*/
public static String unquoteIdentifier(String text, String mark) {
if (isQuoted(text, mark)) {
return text.substring(mark.length(), text.length() - mark.length());
}
return text;
}

public static String unquoteIdentifier(String text) {
if (isQuoted(text, "\"") || isQuoted(text, "'") || isQuoted(text, "`")) {
return text.substring(1, text.length() - 1);
} else {
return text;
}
}

private static boolean isQuoted(String text, String mark) {
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark);
}
}
2 changes: 2 additions & 0 deletions ppl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ dependencies {
antlr "org.antlr:antlr4:4.7.1"

compile "org.antlr:antlr4-runtime:4.7.1"
compile group: 'com.google.guava', name: 'guava', version:'15.0'
compile group: 'org.elasticsearch', name: 'elasticsearch-x-content', version:"${es_version}"
compile group: 'org.json', name: 'json', version:'20180813'
compile group: 'org.springframework', name: 'spring-context', version: '5.2.5.RELEASE'
compile group: 'org.springframework', name: 'spring-beans', version: '5.2.5.RELEASE'
Expand Down
18 changes: 11 additions & 7 deletions ppl/src/main/antlr/OpenDistroPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ EXCLAMATION_SYMBOL: '!';
COLON: ':';
LT_PRTHS: '(';
RT_PRTHS: ')';
LT_SQR_PRTHS: '[';
RT_SQR_PRTHS: ']';
SINGLE_QUOTE: '\'';
DOUBLE_QUOTE: '"';
BACKTICK: '`';

// AGGREGATIONS
AVG: 'AVG';
Expand Down Expand Up @@ -240,16 +245,15 @@ WILDCARDQUERY: 'WILDCARDQUERY';
WILDCARD_QUERY: 'WILDCARD_QUERY';

// LITERALS AND VALUES
STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
//STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
ID: ID_LITERAL;
DOT_ID: '.' ID;
DECIMAL_LITERAL: DEC_DIGIT+;
// TODO: define WCFIELD
INTEGER_LITERAL: DEC_DIGIT+;
DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+;

fragment ID_LITERAL: [A-Z_$0-9@]*?[A-Z_$\-]+?[A-Z_$\-0-9]*;
fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
fragment DEC_DIGIT: [0-9];


Expand Down
108 changes: 48 additions & 60 deletions ppl/src/main/antlr/OpenDistroPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,31 @@ pplStatement
: searchCommand (PIPE commands)*
;


/** commands */
commands
: whereCommand | fieldsCommand | renameCommand | statsCommand | dedupCommand | sortCommand | evalCommand
;

searchCommand
: (SEARCH)? fromClause #searchWithoutFilter
| (SEARCH)? fromClause logicalExpression #searchFromClauseLogicExpr
| (SEARCH)? logicalExpression fromClause #searchLogicExprFromClause
: (SEARCH)? fromClause #searchFrom
| (SEARCH)? fromClause logicalExpression #searchFromFilter
| (SEARCH)? logicalExpression fromClause #searchFilterFrom
;

whereCommand
: WHERE evalExpression
: WHERE logicalExpression
;

fieldsCommand
: FIELDS fieldList
: FIELDS (PLUS | MINUS)? wcFieldList
;

renameCommand
: RENAME wcField AS wcField
: RENAME orignalField=wcFieldExpression AS renamedField=wcFieldExpression
;

statsCommand
: STATS (statsArgument)* (statsAggTerm | sparklineAggTerm) (byClause)? (dedupSplitValues)?
: STATS (statsArgument)* statsAggTerm (byClause)? (dedupSplitValues)?
;

dedupCommand
Expand All @@ -63,7 +62,7 @@ sortCommand
;

evalCommand
: EVAL fieldExpression EQUAL evalExpression (COMMA fieldExpression EQUAL evalExpression)
: EVAL evalExpression (COMMA evalExpression)*
;

/** arguments */
Expand Down Expand Up @@ -94,16 +93,12 @@ byClause
;

sortbyClause
: (PLUS | MINUS) sortField (COMMA (PLUS | MINUS) sortField)*
: (PLUS | MINUS)? sortField (COMMA (PLUS | MINUS)? sortField)*
;

/** aggregation terms */
aggregationTerm
: statsAggTerm | sparklineAggTerm
;

statsAggTerm
: statsFunction LT_PRTHS (fieldExpression)? RT_PRTHS
: statsFunction
;

sparklineAggTerm
Expand All @@ -116,8 +111,8 @@ statsFunction
;

aggregationFunction
: aggFunctionName LT_PRTHS fieldExpression RT_PRTHS
| percentileAggFunction
: aggFunctionName LT_PRTHS fieldExpression RT_PRTHS #aggFunctionCall
| percentileAggFunction #percentileAggFunctionCall
;

aggFunctionName
Expand All @@ -126,7 +121,7 @@ aggFunctionName
;

percentileAggFunction
: PERCENTILE LESS fieldExpression GREATER LT_PRTHS fieldExpression RT_PRTHS
: PERCENTILE LESS value=decimalLiteral GREATER LT_PRTHS aggField=fieldExpression RT_PRTHS
;

eventOrderFunction
Expand All @@ -148,16 +143,17 @@ timeFunctionName
;

sparklineAggregation
: SPARKLINE LT_PRTHS COUNT LT_PRTHS wcField RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
| SPARKLINE RT_PRTHS sparklineFunction LT_PRTHS wcField RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
: SPARKLINE LT_PRTHS COUNT LT_PRTHS wcFieldExpression RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
| SPARKLINE RT_PRTHS sparklineFunction LT_PRTHS wcFieldExpression RT_PRTHS COMMA spanLength=decimalLiteral RT_PRTHS
;

sparklineFunction
: sparklineFunctionName LT_PRTHS fieldExpression RT_PRTHS
;

sparklineFunctionName
: C | COUNT | DC | MEAN | AVG | STDEV | STDEVP | VAR | VARP | SUM | SUMSQ | MIN | MAX | RANGE
:
// | C | COUNT | DC | MEAN | AVG | STDEV | STDEVP | VAR | VARP | SUM | SUMSQ | MIN | MAX | RANGE
;

/** expressions */
Expand All @@ -173,35 +169,39 @@ logicalExpression
| comparisonExpression #comparsion
| evalExpression #eval
| NOT logicalExpression #logicalNot
| left=logicalExpression OR right=logicalExpression #logicalOrBinary
| left=logicalExpression (AND)? right=logicalExpression #logicalAndBinary
| left=logicalExpression OR right=logicalExpression #logicalOr
| left=logicalExpression (AND)? right=logicalExpression #logicalAnd
;

evalExpression
: literalValue EQUAL literalValue
| evalFunctionCall
: fieldExpression EQUAL evalFunctionCall
;

comparisonExpression
: left=fieldExpression comparisonOperator right=literalValue
| fieldExpression IN valueList
: left=fieldExpression comparisonOperator right=literalValue #compareExpr
| fieldExpression IN valueList #inExpr
;

booleanExpression
: LT_PRTHS booleanExpression RT_PRTHS
: LT_PRTHS booleanLiteral RT_PRTHS
| booleanLiteral
;

/** tables */
tableSource
: ident
| stringLiteral
;

/** fields */
fieldList
: fieldExpression (COMMA fieldExpression)*
;

wcFieldList
: wcFieldExpression (COMMA wcFieldExpression)*
;

sortField
: fieldExpression #defaultSort
| AUTO LT_PRTHS fieldExpression RT_PRTHS #autoSort
Expand All @@ -210,14 +210,18 @@ sortField
| NUM LT_PRTHS fieldExpression RT_PRTHS #numSort
;

wcField
: WCFIELD
;

fieldExpression
: ident
| SINGLE_QUOTE ident SINGLE_QUOTE
| DOUBLE_QUOTE ident DOUBLE_QUOTE
| BACKTICK ident BACKTICK
;

wcFieldExpression
: wildcard
;


/** functions */
evalFunctionCall
: evalFunctionName LT_PRTHS functionArgs RT_PRTHS
Expand Down Expand Up @@ -253,7 +257,7 @@ functionArgs
;

functionArg
: constant | fullColumnName | expression | evalFunctionCall
: expression | fieldExpression | literalValue
;

/** operators */
Expand All @@ -264,11 +268,17 @@ comparisonOperator
/** literals and values*/
literalValue
: stringLiteral
| decimalLiteral
| (PLUS | MINUS)? integerLiteral
| (PLUS | MINUS)? decimalLiteral
| booleanLiteral
;

stringLiteral
: STRING_LITERAL
: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING
;

integerLiteral
: INTEGER_LITERAL
;

decimalLiteral
Expand All @@ -283,32 +293,10 @@ valueList
: LT_PRTHS literalValue (COMMA literalValue)* RT_PRTHS
;

fullColumnName
: simpleId DOT_ID*
;

constant
: stringLiteral | decimalLiteral
| MINUS decimalLiteral
| booleanLiteral
;

simpleId
: ID
| DOT_ID
| STRING_LITERAL
;

ident
: ID
| DOT_ID
: (DOT)? ID
;

/** dataset */
datasetType
: DATAMODEL | LOOKUP | SAVEDSEARCH
;

datasetName
:
wildcard
: (MODULE | ident)+
;
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
* PPL Syntax Parser
*/
public class PPLSyntaxParser {
public ParseTree analyzeSyntax(String sql) {
OpenDistroPPLParser parser = createParser(createLexer(sql));
public ParseTree analyzeSyntax(String query) {
OpenDistroPPLParser parser = createParser(createLexer(query));
parser.addErrorListener(new SyntaxAnalysisErrorListener());
return parser.root();
}
Expand All @@ -38,8 +38,8 @@ private OpenDistroPPLParser createParser(Lexer lexer) {
new CommonTokenStream(lexer));
}

private OpenDistroPPLLexer createLexer(String sql) {
private OpenDistroPPLLexer createLexer(String query) {
return new OpenDistroPPLLexer(
new CaseInsensitiveCharStream(sql));
new CaseInsensitiveCharStream(query));
}
}
Loading

0 comments on commit ded0421

Please sign in to comment.