From 17f2a869051084a419dbd63d3b45f4ee8c59162f Mon Sep 17 00:00:00 2001 From: Tomoyuki Morita Date: Mon, 25 Nov 2024 21:29:28 -0800 Subject: [PATCH] Add grammar validation for PPL Signed-off-by: Tomoyuki Morita --- async-query-core/build.gradle | 4 +- .../src/main/antlr/OpenSearchPPLLexer.g4 | 496 +++++++ .../src/main/antlr/OpenSearchPPLParser.g4 | 1174 +++++++++++++++++ .../dispatcher/SparkQueryDispatcher.java | 7 +- .../CWLPPLGrammarElementValidator.java | 45 + .../sql/spark/validator/GrammarElement.java | 85 +- .../spark/validator/PPLGrammarElement.java | 31 + .../validator/PPLQueryValidationVisitor.java | 87 ++ .../spark/validator/PPLQueryValidator.java | 50 + .../S3GlueGrammarElementValidator.java | 83 -- .../S3GlueSQLGrammarElementValidator.java | 83 ++ .../spark/validator/SQLGrammarElement.java | 91 ++ .../validator/SQLQueryValidationVisitor.java | 168 +-- .../SecurityLakeGrammarElementValidator.java | 123 -- ...ecurityLakeSQLGrammarElementValidator.java | 123 ++ .../asyncquery/AsyncQueryCoreIntegTest.java | 12 +- .../dispatcher/SparkQueryDispatcherTest.java | 19 +- .../GrammarElementValidatorProviderTest.java | 15 +- .../validator/PPLQueryValidatorTest.java | 167 +++ .../validator/SQLQueryValidatorTest.java | 4 +- .../config/AsyncExecutorServiceModule.java | 29 +- .../AsyncQueryExecutorServiceSpec.java | 12 +- 22 files changed, 2505 insertions(+), 403 deletions(-) create mode 100644 async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 create mode 100644 async-query-core/src/main/antlr/OpenSearchPPLParser.g4 create mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/CWLPPLGrammarElementValidator.java create mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLGrammarElement.java create mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidationVisitor.java create mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidator.java delete mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueGrammarElementValidator.java create mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueSQLGrammarElementValidator.java create mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLGrammarElement.java delete mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeGrammarElementValidator.java create mode 100644 async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeSQLGrammarElementValidator.java create mode 100644 async-query-core/src/test/java/org/opensearch/sql/spark/validator/PPLQueryValidatorTest.java diff --git a/async-query-core/build.gradle b/async-query-core/build.gradle index 330b418681..37bf6748c9 100644 --- a/async-query-core/build.gradle +++ b/async-query-core/build.gradle @@ -21,12 +21,14 @@ tasks.register('downloadG4Files', Exec) { executable 'curl' - def opensearchSparkBranch = "0.5" + def opensearchSparkBranch = "0.6" def apacheSparkVersionTag = "v3.5.1" args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4" args '-o', 'src/main/antlr/SparkSqlBase.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4" args '-o', 'src/main/antlr/SqlBaseParser.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkVersionTag}/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4" args '-o', 'src/main/antlr/SqlBaseLexer.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkVersionTag}/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4" + args '-o', 'src/main/antlr/OpenSearchPPLParser.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4" + args '-o', 'src/main/antlr/OpenSearchPPLLexer.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4" } generateGrammarSource { diff --git a/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 b/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 new file mode 100644 index 0000000000..cb323f7942 --- /dev/null +++ b/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 @@ -0,0 +1,496 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +lexer grammar OpenSearchPPLLexer; + +channels { WHITESPACE, ERRORCHANNEL } + + +// COMMAND KEYWORDS +SEARCH: 'SEARCH'; +DESCRIBE: 'DESCRIBE'; +SHOW: 'SHOW'; +FROM: 'FROM'; +WHERE: 'WHERE'; +FIELDS: 'FIELDS'; +RENAME: 'RENAME'; +STATS: 'STATS'; +EVENTSTATS: 'EVENTSTATS'; +DEDUP: 'DEDUP'; +SORT: 'SORT'; +EVAL: 'EVAL'; +HEAD: 'HEAD'; +TOP: 'TOP'; +RARE: 'RARE'; +PARSE: 'PARSE'; +METHOD: 'METHOD'; +REGEX: 'REGEX'; +PUNCT: 'PUNCT'; +GROK: 'GROK'; +PATTERN: 'PATTERN'; +PATTERNS: 'PATTERNS'; +NEW_FIELD: 'NEW_FIELD'; +KMEANS: 'KMEANS'; +AD: 'AD'; +ML: 'ML'; +FILLNULL: 'FILLNULL'; +EXPAND: 'EXPAND'; +FLATTEN: 'FLATTEN'; +TRENDLINE: 'TRENDLINE'; + +//Native JOIN KEYWORDS +JOIN: 'JOIN'; +ON: 'ON'; +INNER: 'INNER'; +OUTER: 'OUTER'; +FULL: 'FULL'; +SEMI: 'SEMI'; +ANTI: 'ANTI'; +CROSS: 'CROSS'; +LEFT_HINT: 'HINT.LEFT'; +RIGHT_HINT: 'HINT.RIGHT'; + +//CORRELATION KEYWORDS +CORRELATE: 'CORRELATE'; +SELF: 'SELF'; +EXACT: 'EXACT'; +APPROXIMATE: 'APPROXIMATE'; +SCOPE: 'SCOPE'; +MAPPING: 'MAPPING'; + +//EXPLAIN KEYWORDS +EXPLAIN: 'EXPLAIN'; +FORMATTED: 'FORMATTED'; +COST: 'COST'; +CODEGEN: 'CODEGEN'; +EXTENDED: 'EXTENDED'; +SIMPLE: 'SIMPLE'; + +// COMMAND ASSIST KEYWORDS +AS: 'AS'; +BY: 'BY'; +SOURCE: 'SOURCE'; +INDEX: 'INDEX'; +D: 'D'; +DESC: 'DESC'; +DATASOURCES: 'DATASOURCES'; +USING: 'USING'; +WITH: 'WITH'; + +// FIELD KEYWORDS +AUTO: 'AUTO'; +STR: 'STR'; +IP: 'IP'; +NUM: 'NUM'; + + +// FIELDSUMMARY keywords +FIELDSUMMARY: 'FIELDSUMMARY'; +INCLUDEFIELDS: 'INCLUDEFIELDS'; +NULLS: 'NULLS'; + +//TRENDLINE KEYWORDS +SMA: 'SMA'; +WMA: 'WMA'; + +// ARGUMENT KEYWORDS +KEEPEMPTY: 'KEEPEMPTY'; +CONSECUTIVE: 'CONSECUTIVE'; +DEDUP_SPLITVALUES: 'DEDUP_SPLITVALUES'; +PARTITIONS: 'PARTITIONS'; +ALLNUM: 'ALLNUM'; +DELIM: 'DELIM'; +CENTROIDS: 'CENTROIDS'; +ITERATIONS: 'ITERATIONS'; +DISTANCE_TYPE: 'DISTANCE_TYPE'; +NUMBER_OF_TREES: 'NUMBER_OF_TREES'; +SHINGLE_SIZE: 'SHINGLE_SIZE'; +SAMPLE_SIZE: 'SAMPLE_SIZE'; +OUTPUT_AFTER: 'OUTPUT_AFTER'; +TIME_DECAY: 'TIME_DECAY'; +ANOMALY_RATE: 'ANOMALY_RATE'; +CATEGORY_FIELD: 'CATEGORY_FIELD'; +TIME_FIELD: 'TIME_FIELD'; +TIME_ZONE: 'TIME_ZONE'; +TRAINING_DATA_SIZE: 'TRAINING_DATA_SIZE'; +ANOMALY_SCORE_THRESHOLD: 'ANOMALY_SCORE_THRESHOLD'; +APPEND: 'APPEND'; + +// COMPARISON FUNCTION KEYWORDS +CASE: 'CASE'; +ELSE: 'ELSE'; +IN: 'IN'; +EXISTS: 'EXISTS'; + +// LOGICAL KEYWORDS +NOT: 'NOT'; +OR: 'OR'; +AND: 'AND'; +XOR: 'XOR'; +TRUE: 'TRUE'; +FALSE: 'FALSE'; +REGEXP: 'REGEXP'; + +// DATETIME, INTERVAL AND UNIT KEYWORDS +CONVERT_TZ: 'CONVERT_TZ'; +DATETIME: 'DATETIME'; +DAY: 'DAY'; +DAY_HOUR: 'DAY_HOUR'; +DAY_MICROSECOND: 'DAY_MICROSECOND'; +DAY_MINUTE: 'DAY_MINUTE'; +DAY_OF_YEAR: 'DAY_OF_YEAR'; +DAY_SECOND: 'DAY_SECOND'; +HOUR: 'HOUR'; +HOUR_MICROSECOND: 'HOUR_MICROSECOND'; +HOUR_MINUTE: 'HOUR_MINUTE'; +HOUR_OF_DAY: 'HOUR_OF_DAY'; +HOUR_SECOND: 'HOUR_SECOND'; +INTERVAL: 'INTERVAL'; +MICROSECOND: 'MICROSECOND'; +MILLISECOND: 'MILLISECOND'; +MINUTE: 'MINUTE'; +MINUTE_MICROSECOND: 'MINUTE_MICROSECOND'; +MINUTE_OF_DAY: 'MINUTE_OF_DAY'; +MINUTE_OF_HOUR: 'MINUTE_OF_HOUR'; +MINUTE_SECOND: 'MINUTE_SECOND'; +MONTH: 'MONTH'; +MONTH_OF_YEAR: 'MONTH_OF_YEAR'; +QUARTER: 'QUARTER'; +SECOND: 'SECOND'; +SECOND_MICROSECOND: 'SECOND_MICROSECOND'; +SECOND_OF_MINUTE: 'SECOND_OF_MINUTE'; +WEEK: 'WEEK'; +WEEK_OF_YEAR: 'WEEK_OF_YEAR'; +YEAR: 'YEAR'; +YEAR_MONTH: 'YEAR_MONTH'; + +// DATASET TYPES +DATAMODEL: 'DATAMODEL'; +LOOKUP: 'LOOKUP'; +SAVEDSEARCH: 'SAVEDSEARCH'; + +// CONVERTED DATA TYPES +INT: 'INT'; +INTEGER: 'INTEGER'; +DOUBLE: 'DOUBLE'; +LONG: 'LONG'; +FLOAT: 'FLOAT'; +STRING: 'STRING'; +BOOLEAN: 'BOOLEAN'; + +// SPECIAL CHARACTERS AND OPERATORS +PIPE: '|'; +COMMA: ','; +DOT: '.'; +EQUAL: '='; +GREATER: '>'; +LESS: '<'; +NOT_GREATER: '<' '='; +NOT_LESS: '>' '='; +NOT_EQUAL: '!' '='; +PLUS: '+'; +MINUS: '-'; +STAR: '*'; +DIVIDE: '/'; +MODULE: '%'; +EXCLAMATION_SYMBOL: '!'; +COLON: ':'; +LT_PRTHS: '('; +RT_PRTHS: ')'; +LT_SQR_PRTHS: '['; +RT_SQR_PRTHS: ']'; +SINGLE_QUOTE: '\''; +DOUBLE_QUOTE: '"'; +BACKTICK: '`'; +ARROW: '->'; + +// Operators. Bit + +BIT_NOT_OP: '~'; +BIT_AND_OP: '&'; +BIT_XOR_OP: '^'; + +// AGGREGATIONS +AVG: 'AVG'; +COUNT: 'COUNT'; +DISTINCT_COUNT: 'DISTINCT_COUNT'; +ESTDC: 'ESTDC'; +ESTDC_ERROR: 'ESTDC_ERROR'; +MAX: 'MAX'; +MEAN: 'MEAN'; +MEDIAN: 'MEDIAN'; +MIN: 'MIN'; +MODE: 'MODE'; +RANGE: 'RANGE'; +STDEV: 'STDEV'; +STDEVP: 'STDEVP'; +SUM: 'SUM'; +SUMSQ: 'SUMSQ'; +VAR_SAMP: 'VAR_SAMP'; +VAR_POP: 'VAR_POP'; +STDDEV_SAMP: 'STDDEV_SAMP'; +STDDEV_POP: 'STDDEV_POP'; +PERCENTILE: 'PERCENTILE'; +PERCENTILE_APPROX: 'PERCENTILE_APPROX'; +TAKE: 'TAKE'; +FIRST: 'FIRST'; +LAST: 'LAST'; +LIST: 'LIST'; +VALUES: 'VALUES'; +EARLIEST: 'EARLIEST'; +EARLIEST_TIME: 'EARLIEST_TIME'; +LATEST: 'LATEST'; +LATEST_TIME: 'LATEST_TIME'; +PER_DAY: 'PER_DAY'; +PER_HOUR: 'PER_HOUR'; +PER_MINUTE: 'PER_MINUTE'; +PER_SECOND: 'PER_SECOND'; +RATE: 'RATE'; +SPARKLINE: 'SPARKLINE'; +C: 'C'; +DC: 'DC'; + +// BASIC FUNCTIONS +ABS: 'ABS'; +CBRT: 'CBRT'; +CEIL: 'CEIL'; +CEILING: 'CEILING'; +CONV: 'CONV'; +CRC32: 'CRC32'; +E: 'E'; +EXP: 'EXP'; +FLOOR: 'FLOOR'; +LN: 'LN'; +LOG: 'LOG'; +LOG10: 'LOG10'; +LOG2: 'LOG2'; +MOD: 'MOD'; +PI: 'PI'; +POSITION: 'POSITION'; +POW: 'POW'; +POWER: 'POWER'; +RAND: 'RAND'; +ROUND: 'ROUND'; +SIGN: 'SIGN'; +SIGNUM: 'SIGNUM'; +SQRT: 'SQRT'; +TRUNCATE: 'TRUNCATE'; + +// TRIGONOMETRIC FUNCTIONS +ACOS: 'ACOS'; +ASIN: 'ASIN'; +ATAN: 'ATAN'; +ATAN2: 'ATAN2'; +COS: 'COS'; +COT: 'COT'; +DEGREES: 'DEGREES'; +RADIANS: 'RADIANS'; +SIN: 'SIN'; +TAN: 'TAN'; + +// CRYPTOGRAPHIC FUNCTIONS +MD5: 'MD5'; +SHA1: 'SHA1'; +SHA2: 'SHA2'; + +// DATE AND TIME FUNCTIONS +ADDDATE: 'ADDDATE'; +ADDTIME: 'ADDTIME'; +CURDATE: 'CURDATE'; +CURRENT_DATE: 'CURRENT_DATE'; +CURRENT_TIME: 'CURRENT_TIME'; +CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; +CURRENT_TIMEZONE: 'CURRENT_TIMEZONE'; +CURTIME: 'CURTIME'; +DATE: 'DATE'; +DATEDIFF: 'DATEDIFF'; +DATE_ADD: 'DATE_ADD'; +DATE_FORMAT: 'DATE_FORMAT'; +DATE_SUB: 'DATE_SUB'; +DAYNAME: 'DAYNAME'; +DAYOFMONTH: 'DAYOFMONTH'; +DAYOFWEEK: 'DAYOFWEEK'; +DAYOFYEAR: 'DAYOFYEAR'; +DAY_OF_MONTH: 'DAY_OF_MONTH'; +DAY_OF_WEEK: 'DAY_OF_WEEK'; +DURATION: 'DURATION'; +EXTRACT: 'EXTRACT'; +FROM_DAYS: 'FROM_DAYS'; +FROM_UNIXTIME: 'FROM_UNIXTIME'; +GET_FORMAT: 'GET_FORMAT'; +LAST_DAY: 'LAST_DAY'; +LOCALTIME: 'LOCALTIME'; +LOCALTIMESTAMP: 'LOCALTIMESTAMP'; +MAKEDATE: 'MAKEDATE'; +MAKE_DATE: 'MAKE_DATE'; +MAKETIME: 'MAKETIME'; +MONTHNAME: 'MONTHNAME'; +NOW: 'NOW'; +PERIOD_ADD: 'PERIOD_ADD'; +PERIOD_DIFF: 'PERIOD_DIFF'; +SEC_TO_TIME: 'SEC_TO_TIME'; +STR_TO_DATE: 'STR_TO_DATE'; +SUBDATE: 'SUBDATE'; +SUBTIME: 'SUBTIME'; +SYSDATE: 'SYSDATE'; +TIME: 'TIME'; +TIMEDIFF: 'TIMEDIFF'; +TIMESTAMP: 'TIMESTAMP'; +TIMESTAMPADD: 'TIMESTAMPADD'; +TIMESTAMPDIFF: 'TIMESTAMPDIFF'; +TIME_FORMAT: 'TIME_FORMAT'; +TIME_TO_SEC: 'TIME_TO_SEC'; +TO_DAYS: 'TO_DAYS'; +TO_SECONDS: 'TO_SECONDS'; +UNIX_TIMESTAMP: 'UNIX_TIMESTAMP'; +UTC_DATE: 'UTC_DATE'; +UTC_TIME: 'UTC_TIME'; +UTC_TIMESTAMP: 'UTC_TIMESTAMP'; +WEEKDAY: 'WEEKDAY'; +YEARWEEK: 'YEARWEEK'; + +// TEXT FUNCTIONS +SUBSTR: 'SUBSTR'; +SUBSTRING: 'SUBSTRING'; +LTRIM: 'LTRIM'; +RTRIM: 'RTRIM'; +TRIM: 'TRIM'; +TO: 'TO'; +LOWER: 'LOWER'; +UPPER: 'UPPER'; +CONCAT: 'CONCAT'; +CONCAT_WS: 'CONCAT_WS'; +LENGTH: 'LENGTH'; +STRCMP: 'STRCMP'; +RIGHT: 'RIGHT'; +LEFT: 'LEFT'; +ASCII: 'ASCII'; +LOCATE: 'LOCATE'; +REPLACE: 'REPLACE'; +REVERSE: 'REVERSE'; +CAST: 'CAST'; +ISEMPTY: 'ISEMPTY'; +ISBLANK: 'ISBLANK'; + +// JSON TEXT FUNCTIONS +JSON: 'JSON'; +JSON_OBJECT: 'JSON_OBJECT'; +JSON_ARRAY: 'JSON_ARRAY'; +JSON_ARRAY_LENGTH: 'JSON_ARRAY_LENGTH'; +TO_JSON_STRING: 'TO_JSON_STRING'; +JSON_EXTRACT: 'JSON_EXTRACT'; +JSON_KEYS: 'JSON_KEYS'; +JSON_VALID: 'JSON_VALID'; +//JSON_APPEND: 'JSON_APPEND'; +//JSON_DELETE: 'JSON_DELETE'; +//JSON_EXTEND: 'JSON_EXTEND'; +//JSON_SET: 'JSON_SET'; +//JSON_ARRAY_ALL_MATCH: 'JSON_ARRAY_ALL_MATCH'; +//JSON_ARRAY_ANY_MATCH: 'JSON_ARRAY_ANY_MATCH'; +//JSON_ARRAY_FILTER: 'JSON_ARRAY_FILTER'; +//JSON_ARRAY_MAP: 'JSON_ARRAY_MAP'; +//JSON_ARRAY_REDUCE: 'JSON_ARRAY_REDUCE'; + +// COLLECTION FUNCTIONS +ARRAY: 'ARRAY'; +ARRAY_LENGTH: 'ARRAY_LENGTH'; + +// LAMBDA FUNCTIONS +//EXISTS: 'EXISTS'; +FORALL: 'FORALL'; +FILTER: 'FILTER'; +TRANSFORM: 'TRANSFORM'; +REDUCE: 'REDUCE'; + +// BOOL FUNCTIONS +LIKE: 'LIKE'; +ISNULL: 'ISNULL'; +ISNOTNULL: 'ISNOTNULL'; +ISPRESENT: 'ISPRESENT'; +BETWEEN: 'BETWEEN'; +CIDRMATCH: 'CIDRMATCH'; + +// FLOWCONTROL FUNCTIONS +IFNULL: 'IFNULL'; +NULLIF: 'NULLIF'; +IF: 'IF'; +TYPEOF: 'TYPEOF'; + +//OTHER CONDITIONAL EXPRESSIONS +COALESCE: 'COALESCE'; + +// RELEVANCE FUNCTIONS AND PARAMETERS +MATCH: 'MATCH'; +MATCH_PHRASE: 'MATCH_PHRASE'; +MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX'; +MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX'; +SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING'; +MULTI_MATCH: 'MULTI_MATCH'; +QUERY_STRING: 'QUERY_STRING'; + +ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD'; +ANALYZE_WILDCARD: 'ANALYZE_WILDCARD'; +ANALYZER: 'ANALYZER'; +AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'; +BOOST: 'BOOST'; +CUTOFF_FREQUENCY: 'CUTOFF_FREQUENCY'; +DEFAULT_FIELD: 'DEFAULT_FIELD'; +DEFAULT_OPERATOR: 'DEFAULT_OPERATOR'; +ENABLE_POSITION_INCREMENTS: 'ENABLE_POSITION_INCREMENTS'; +ESCAPE: 'ESCAPE'; +FLAGS: 'FLAGS'; +FUZZY_MAX_EXPANSIONS: 'FUZZY_MAX_EXPANSIONS'; +FUZZY_PREFIX_LENGTH: 'FUZZY_PREFIX_LENGTH'; +FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS'; +FUZZY_REWRITE: 'FUZZY_REWRITE'; +FUZZINESS: 'FUZZINESS'; +LENIENT: 'LENIENT'; +LOW_FREQ_OPERATOR: 'LOW_FREQ_OPERATOR'; +MAX_DETERMINIZED_STATES: 'MAX_DETERMINIZED_STATES'; +MAX_EXPANSIONS: 'MAX_EXPANSIONS'; +MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH'; +OPERATOR: 'OPERATOR'; +PHRASE_SLOP: 'PHRASE_SLOP'; +PREFIX_LENGTH: 'PREFIX_LENGTH'; +QUOTE_ANALYZER: 'QUOTE_ANALYZER'; +QUOTE_FIELD_SUFFIX: 'QUOTE_FIELD_SUFFIX'; +REWRITE: 'REWRITE'; +SLOP: 'SLOP'; +TIE_BREAKER: 'TIE_BREAKER'; +TYPE: 'TYPE'; +ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY'; + +// SPAN KEYWORDS +SPAN: 'SPAN'; +MS: 'MS'; +S: 'S'; +M: 'M'; +H: 'H'; +W: 'W'; +Q: 'Q'; +Y: 'Y'; + + +// LITERALS AND VALUES +//STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; +ID: ID_LITERAL; +CLUSTER: CLUSTER_PREFIX_LITERAL; +INTEGER_LITERAL: DEC_DIGIT+; +DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+; + +fragment DATE_SUFFIX: ([\-.][*0-9]+)+; +fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*; +fragment CLUSTER_PREFIX_LITERAL: [*A-Z]+?[*A-Z_\-0-9]* COLON; +ID_DATE_SUFFIX: CLUSTER_PREFIX_LITERAL? ID_LITERAL DATE_SUFFIX; +DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; +SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; +BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; +fragment DEC_DIGIT: [0-9]; + +LINE_COMMENT: '//' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN); +BLOCK_COMMENT: '/*' .*? '*/' -> channel(HIDDEN); + +ERROR_RECOGNITION: . -> channel(ERRORCHANNEL); diff --git a/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 b/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 new file mode 100644 index 0000000000..133cf64be5 --- /dev/null +++ b/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 @@ -0,0 +1,1174 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +parser grammar OpenSearchPPLParser; + + +options { tokenVocab = OpenSearchPPLLexer; } +root + : pplStatement? EOF + ; + +// statement +pplStatement + : dmlStatement + ; + +dmlStatement + : (explainCommand PIPE)? queryStatement + ; + +queryStatement + : pplCommands (PIPE commands)* + ; + +subSearch + : searchCommand (PIPE commands)* + ; + +// commands +pplCommands + : searchCommand + | describeCommand + ; + +commands + : whereCommand + | correlateCommand + | joinCommand + | fieldsCommand + | statsCommand + | dedupCommand + | sortCommand + | headCommand + | topCommand + | rareCommand + | evalCommand + | grokCommand + | parseCommand + | patternsCommand + | lookupCommand + | renameCommand + | fillnullCommand + | fieldsummaryCommand + | flattenCommand + | expandCommand + | trendlineCommand + ; + +commandName + : SEARCH + | DESCRIBE + | SHOW + | AD + | ML + | KMEANS + | WHERE + | CORRELATE + | JOIN + | FIELDS + | STATS + | EVENTSTATS + | DEDUP + | EXPLAIN + | SORT + | HEAD + | TOP + | RARE + | EVAL + | GROK + | PARSE + | PATTERNS + | LOOKUP + | RENAME + | EXPAND + | FILLNULL + | FIELDSUMMARY + | FLATTEN + | TRENDLINE + ; + +searchCommand + : (SEARCH)? fromClause # searchFrom + | (SEARCH)? fromClause logicalExpression # searchFromFilter + | (SEARCH)? logicalExpression fromClause # searchFilterFrom + ; + +fieldsummaryCommand + : FIELDSUMMARY (fieldsummaryParameter)* + ; + +fieldsummaryParameter + : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields + | NULLS EQUAL booleanLiteral # fieldsummaryNulls + ; + +describeCommand + : DESCRIBE tableSourceClause + ; + +explainCommand + : EXPLAIN explainMode + ; + +explainMode + : FORMATTED + | COST + | CODEGEN + | EXTENDED + | SIMPLE + ; + +showDataSourcesCommand + : SHOW DATASOURCES + ; + +whereCommand + : WHERE logicalExpression + ; + +correlateCommand + : CORRELATE correlationType FIELDS LT_PRTHS fieldList RT_PRTHS (scopeClause)? mappingList + ; + +correlationType + : SELF + | EXACT + | APPROXIMATE + ; + +scopeClause + : SCOPE LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS + ; + +mappingList + : MAPPING LT_PRTHS ( mappingClause (COMMA mappingClause)* ) RT_PRTHS + ; + +mappingClause + : left = qualifiedName comparisonOperator right = qualifiedName # mappingCompareExpr + ; + +fieldsCommand + : FIELDS (PLUS | MINUS)? fieldList + ; + +renameCommand + : RENAME renameClasue (COMMA renameClasue)* + ; + +statsCommand + : (STATS | EVENTSTATS) (PARTITIONS EQUAL partitions = integerLiteral)? (ALLNUM EQUAL allnum = booleanLiteral)? (DELIM EQUAL delim = stringLiteral)? statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral)? + ; + +dedupCommand + : DEDUP (number = integerLiteral)? fieldList (KEEPEMPTY EQUAL keepempty = booleanLiteral)? (CONSECUTIVE EQUAL consecutive = booleanLiteral)? + ; + +sortCommand + : SORT sortbyClause + ; + +evalCommand + : EVAL evalClause (COMMA evalClause)* + ; + +headCommand + : HEAD (number = integerLiteral)? (FROM from = integerLiteral)? + ; + +topCommand + : TOP (number = integerLiteral)? fieldList (byClause)? + ; + +rareCommand + : RARE fieldList (byClause)? + ; + +grokCommand + : GROK (source_field = expression) (pattern = stringLiteral) + ; + +parseCommand + : PARSE (source_field = expression) (pattern = stringLiteral) + ; + +patternsCommand + : PATTERNS (patternsParameter)* (source_field = expression) + ; + +patternsParameter + : (NEW_FIELD EQUAL new_field = stringLiteral) + | (PATTERN EQUAL pattern = stringLiteral) + ; + +patternsMethod + : PUNCT + | REGEX + ; + +// lookup +lookupCommand + : LOOKUP tableSource lookupMappingList ((APPEND | REPLACE) outputCandidateList)? + ; + +lookupMappingList + : lookupPair (COMMA lookupPair)* + ; + +outputCandidateList + : lookupPair (COMMA lookupPair)* + ; + + // The lookup pair will generate a K-V pair. + // The format is Key -> Alias(outputFieldName, inputField), Value -> outputField. For example: + // 1. When lookupPair is "name AS cName", the key will be Alias(cName, Field(name)), the value will be Field(cName) + // 2. When lookupPair is "dept", the key is Alias(dept, Field(dept)), value is Field(dept) +lookupPair + : inputField = fieldExpression (AS outputField = fieldExpression)? + ; + +fillnullCommand + : FILLNULL (fillNullWithTheSameValue + | fillNullWithFieldVariousValues) + ; + + fillNullWithTheSameValue + : WITH nullReplacement IN nullableField (COMMA nullableField)* + ; + + fillNullWithFieldVariousValues + : USING nullableField EQUAL nullReplacement (COMMA nullableField EQUAL nullReplacement)* + ; + + + nullableField + : fieldExpression + ; + + nullReplacement + : expression + ; + +expandCommand + : EXPAND fieldExpression (AS alias = qualifiedName)? + ; + +flattenCommand + : FLATTEN fieldExpression (AS alias = identifierSeq)? + ; + +trendlineCommand + : TRENDLINE (SORT sortField)? trendlineClause (trendlineClause)* + ; + +trendlineClause + : trendlineType LT_PRTHS numberOfDataPoints = INTEGER_LITERAL COMMA field = fieldExpression RT_PRTHS (AS alias = qualifiedName)? + ; + +trendlineType + : SMA + | WMA + ; + +kmeansCommand + : KMEANS (kmeansParameter)* + ; + +kmeansParameter + : (CENTROIDS EQUAL centroids = integerLiteral) + | (ITERATIONS EQUAL iterations = integerLiteral) + | (DISTANCE_TYPE EQUAL distance_type = stringLiteral) + ; + +adCommand + : AD (adParameter)* + ; + +adParameter + : (NUMBER_OF_TREES EQUAL number_of_trees = integerLiteral) + | (SHINGLE_SIZE EQUAL shingle_size = integerLiteral) + | (SAMPLE_SIZE EQUAL sample_size = integerLiteral) + | (OUTPUT_AFTER EQUAL output_after = integerLiteral) + | (TIME_DECAY EQUAL time_decay = decimalLiteral) + | (ANOMALY_RATE EQUAL anomaly_rate = decimalLiteral) + | (CATEGORY_FIELD EQUAL category_field = stringLiteral) + | (TIME_FIELD EQUAL time_field = stringLiteral) + | (DATE_FORMAT EQUAL date_format = stringLiteral) + | (TIME_ZONE EQUAL time_zone = stringLiteral) + | (TRAINING_DATA_SIZE EQUAL training_data_size = integerLiteral) + | (ANOMALY_SCORE_THRESHOLD EQUAL anomaly_score_threshold = decimalLiteral) + ; + +mlCommand + : ML (mlArg)* + ; + +mlArg + : (argName = ident EQUAL argValue = literalValue) + ; + +// clauses +fromClause + : SOURCE EQUAL tableOrSubqueryClause + | INDEX EQUAL tableOrSubqueryClause + ; + +tableOrSubqueryClause + : LT_SQR_PRTHS subSearch RT_SQR_PRTHS (AS alias = qualifiedName)? + | tableSourceClause + ; + +// One tableSourceClause will generate one Relation node with/without one alias +// even if the relation contains more than one table sources. +// These table sources in one relation will be readed one by one in OpenSearch. +// But it may have different behaivours in different execution backends. +// For example, a Spark UnresovledRelation node only accepts one data source. +tableSourceClause + : tableSource (COMMA tableSource)* (AS alias = qualifiedName)? + ; + +// join +joinCommand + : (joinType) JOIN sideAlias joinHintList? joinCriteria? right = tableOrSubqueryClause + ; + +joinType + : INNER? + | CROSS + | LEFT OUTER? + | RIGHT OUTER? + | FULL OUTER? + | LEFT? SEMI + | LEFT? ANTI + ; + +sideAlias + : (LEFT EQUAL leftAlias = ident)? COMMA? (RIGHT EQUAL rightAlias = ident)? + ; + +joinCriteria + : ON logicalExpression + ; + +joinHintList + : hintPair (COMMA? hintPair)* + ; + +hintPair + : leftHintKey = LEFT_HINT DOT ID EQUAL leftHintValue = ident #leftHint + | rightHintKey = RIGHT_HINT DOT ID EQUAL rightHintValue = ident #rightHint + ; + +renameClasue + : orignalField = wcFieldExpression AS renamedField = wcFieldExpression + ; + +byClause + : BY fieldList + ; + +statsByClause + : BY fieldList + | BY bySpanClause + | BY bySpanClause COMMA fieldList + ; + +bySpanClause + : spanClause (AS alias = qualifiedName)? + ; + +spanClause + : SPAN LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS + ; + +sortbyClause + : sortField (COMMA sortField)* + ; + +evalClause + : fieldExpression EQUAL expression + ; + +// aggregation terms +statsAggTerm + : statsFunction (AS alias = wcFieldExpression)? + ; + +// aggregation functions +statsFunction + : statsFunctionName LT_PRTHS valueExpression RT_PRTHS # statsFunctionCall + | COUNT LT_PRTHS RT_PRTHS # countAllFunctionCall + | (DISTINCT_COUNT | DC) LT_PRTHS valueExpression RT_PRTHS # distinctCountFunctionCall + | percentileFunctionName = (PERCENTILE | PERCENTILE_APPROX) LT_PRTHS valueExpression COMMA percent = integerLiteral RT_PRTHS # percentileFunctionCall + ; + +statsFunctionName + : AVG + | COUNT + | SUM + | MIN + | MAX + | STDDEV_SAMP + | STDDEV_POP + ; + +// expressions +expression + : logicalExpression + | valueExpression + ; + +logicalExpression + : NOT logicalExpression # logicalNot + | LT_PRTHS logicalExpression RT_PRTHS # parentheticLogicalExpr + | comparisonExpression # comparsion + | left = logicalExpression (AND)? right = logicalExpression # logicalAnd + | left = logicalExpression OR right = logicalExpression # logicalOr + | left = logicalExpression XOR right = logicalExpression # logicalXor + | booleanExpression # booleanExpr + ; + +comparisonExpression + : left = valueExpression comparisonOperator right = valueExpression # compareExpr + | valueExpression NOT? IN valueList # inExpr + | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between + ; + +valueExpressionList + : valueExpression + | LT_PRTHS valueExpression (COMMA valueExpression)* RT_PRTHS + ; + +valueExpression + : left = valueExpression binaryOperator = (STAR | DIVIDE | MODULE) right = valueExpression # binaryArithmetic + | left = valueExpression binaryOperator = (PLUS | MINUS) right = valueExpression # binaryArithmetic + | primaryExpression # valueExpressionDefault + | positionFunction # positionFunctionCall + | caseFunction # caseExpr + | timestampFunction # timestampFunctionCall + | LT_PRTHS valueExpression RT_PRTHS # parentheticValueExpr + | LT_SQR_PRTHS subSearch RT_SQR_PRTHS # scalarSubqueryExpr + | ident ARROW expression # lambda + | LT_PRTHS ident (COMMA ident)+ RT_PRTHS ARROW expression # lambda + ; + +primaryExpression + : evalFunctionCall + | fieldExpression + | literalValue + ; + +positionFunction + : positionFunctionName LT_PRTHS functionArg IN functionArg RT_PRTHS + ; + +booleanExpression + : booleanFunctionCall # booleanFunctionCallExpr + | isEmptyExpression # isEmptyExpr + | valueExpressionList NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS # inSubqueryExpr + | EXISTS LT_SQR_PRTHS subSearch RT_SQR_PRTHS # existsSubqueryExpr + | cidrMatchFunctionCall # cidrFunctionCallExpr + ; + + isEmptyExpression + : (ISEMPTY | ISBLANK) LT_PRTHS functionArg RT_PRTHS + ; + + caseFunction + : CASE LT_PRTHS logicalExpression COMMA valueExpression (COMMA logicalExpression COMMA valueExpression)* (ELSE valueExpression)? RT_PRTHS + ; + +relevanceExpression + : singleFieldRelevanceFunction + | multiFieldRelevanceFunction + ; + +// Field is a single column +singleFieldRelevanceFunction + : singleFieldRelevanceFunctionName LT_PRTHS field = relevanceField COMMA query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS + ; + +// Field is a list of columns +multiFieldRelevanceFunction + : multiFieldRelevanceFunctionName LT_PRTHS LT_SQR_PRTHS field = relevanceFieldAndWeight (COMMA field = relevanceFieldAndWeight)* RT_SQR_PRTHS COMMA query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS + ; + +// tables +tableSource + : tableQualifiedName + | ID_DATE_SUFFIX + ; + +tableFunction + : qualifiedName LT_PRTHS functionArgs RT_PRTHS + ; + +// fields +fieldList + : fieldExpression (COMMA fieldExpression)* + ; + +wcFieldList + : wcFieldExpression (COMMA wcFieldExpression)* + ; + +sortField + : (PLUS | MINUS)? sortFieldExpression + ; + +sortFieldExpression + : fieldExpression + | AUTO LT_PRTHS fieldExpression RT_PRTHS + | STR LT_PRTHS fieldExpression RT_PRTHS + | IP LT_PRTHS fieldExpression RT_PRTHS + | NUM LT_PRTHS fieldExpression RT_PRTHS + ; + +fieldExpression + : qualifiedName + ; + +wcFieldExpression + : wcQualifiedName + ; + +// functions +evalFunctionCall + : evalFunctionName LT_PRTHS functionArgs RT_PRTHS + ; + +// cast function +dataTypeFunctionCall + : CAST LT_PRTHS expression AS convertedDataType RT_PRTHS + ; + +// boolean functions +booleanFunctionCall + : conditionFunctionBase LT_PRTHS functionArgs RT_PRTHS + ; + +cidrMatchFunctionCall + : CIDRMATCH LT_PRTHS ipAddress = functionArg COMMA cidrBlock = functionArg RT_PRTHS + ; + +convertedDataType + : typeName = DATE + | typeName = TIME + | typeName = TIMESTAMP + | typeName = INT + | typeName = INTEGER + | typeName = DOUBLE + | typeName = LONG + | typeName = FLOAT + | typeName = STRING + | typeName = BOOLEAN + ; + +evalFunctionName + : mathematicalFunctionName + | dateTimeFunctionName + | textFunctionName + | conditionFunctionBase + | systemFunctionName + | positionFunctionName + | coalesceFunctionName + | cryptographicFunctionName + | jsonFunctionName + | collectionFunctionName + | lambdaFunctionName + ; + +functionArgs + : (functionArg (COMMA functionArg)*)? + ; + +functionArg + : (ident EQUAL)? valueExpression + ; + +relevanceArg + : relevanceArgName EQUAL relevanceArgValue + ; + +relevanceArgName + : ALLOW_LEADING_WILDCARD + | ANALYZER + | ANALYZE_WILDCARD + | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY + | BOOST + | CUTOFF_FREQUENCY + | DEFAULT_FIELD + | DEFAULT_OPERATOR + | ENABLE_POSITION_INCREMENTS + | ESCAPE + | FIELDS + | FLAGS + | FUZZINESS + | FUZZY_MAX_EXPANSIONS + | FUZZY_PREFIX_LENGTH + | FUZZY_REWRITE + | FUZZY_TRANSPOSITIONS + | LENIENT + | LOW_FREQ_OPERATOR + | MAX_DETERMINIZED_STATES + | MAX_EXPANSIONS + | MINIMUM_SHOULD_MATCH + | OPERATOR + | PHRASE_SLOP + | PREFIX_LENGTH + | QUOTE_ANALYZER + | QUOTE_FIELD_SUFFIX + | REWRITE + | SLOP + | TIE_BREAKER + | TIME_ZONE + | TYPE + | ZERO_TERMS_QUERY + ; + +relevanceFieldAndWeight + : field = relevanceField + | field = relevanceField weight = relevanceFieldWeight + | field = relevanceField BIT_XOR_OP weight = relevanceFieldWeight + ; + +relevanceFieldWeight + : integerLiteral + | decimalLiteral + ; + +relevanceField + : qualifiedName + | stringLiteral + ; + +relevanceQuery + : relevanceArgValue + ; + +relevanceArgValue + : qualifiedName + | literalValue + ; + +mathematicalFunctionName + : ABS + | CBRT + | CEIL + | CEILING + | CONV + | CRC32 + | E + | EXP + | FLOOR + | LN + | LOG + | LOG10 + | LOG2 + | MOD + | PI + | POW + | POWER + | RAND + | ROUND + | SIGN + | SIGNUM + | SQRT + | TRUNCATE + | trigonometricFunctionName + ; + +trigonometricFunctionName + : ACOS + | ASIN + | ATAN + | ATAN2 + | COS + | COT + | DEGREES + | RADIANS + | SIN + | TAN + ; + +cryptographicFunctionName + : MD5 + | SHA1 + | SHA2 + ; + +dateTimeFunctionName + : ADDDATE + | ADDTIME + | CONVERT_TZ + | CURDATE + | CURRENT_DATE + | CURRENT_TIME + | CURRENT_TIMESTAMP + | CURRENT_TIMEZONE + | CURTIME + | DATE + | DATEDIFF + | DATETIME + | DATE_ADD + | DATE_FORMAT + | DATE_SUB + | DAY + | DAYNAME + | DAYOFMONTH + | DAYOFWEEK + | DAYOFYEAR + | DAY_OF_MONTH + | DAY_OF_WEEK + | DAY_OF_YEAR + | FROM_DAYS + | FROM_UNIXTIME + | HOUR + | HOUR_OF_DAY + | LAST_DAY + | LOCALTIME + | LOCALTIMESTAMP + | MAKEDATE + | MAKE_DATE + | MAKETIME + | MICROSECOND + | MINUTE + | MINUTE_OF_DAY + | MINUTE_OF_HOUR + | MONTH + | MONTHNAME + | MONTH_OF_YEAR + | NOW + | PERIOD_ADD + | PERIOD_DIFF + | QUARTER + | SECOND + | SECOND_OF_MINUTE + | SEC_TO_TIME + | STR_TO_DATE + | SUBDATE + | SUBTIME + | SYSDATE + | TIME + | TIMEDIFF + | TIMESTAMP + | TIME_FORMAT + | TIME_TO_SEC + | TO_DAYS + | TO_SECONDS + | UNIX_TIMESTAMP + | UTC_DATE + | UTC_TIME + | UTC_TIMESTAMP + | WEEK + | WEEKDAY + | WEEK_OF_YEAR + | YEAR + | YEARWEEK + ; + +getFormatFunction + : GET_FORMAT LT_PRTHS getFormatType COMMA functionArg RT_PRTHS + ; + +getFormatType + : DATE + | DATETIME + | TIME + | TIMESTAMP + ; + +extractFunction + : EXTRACT LT_PRTHS datetimePart FROM functionArg RT_PRTHS + ; + +simpleDateTimePart + : MICROSECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + ; + +complexDateTimePart + : SECOND_MICROSECOND + | MINUTE_MICROSECOND + | MINUTE_SECOND + | HOUR_MICROSECOND + | HOUR_SECOND + | HOUR_MINUTE + | DAY_MICROSECOND + | DAY_SECOND + | DAY_MINUTE + | DAY_HOUR + | YEAR_MONTH + ; + +datetimePart + : simpleDateTimePart + | complexDateTimePart + ; + +timestampFunction + : timestampFunctionName LT_PRTHS simpleDateTimePart COMMA firstArg = functionArg COMMA secondArg = functionArg RT_PRTHS + ; + +timestampFunctionName + : TIMESTAMPADD + | TIMESTAMPDIFF + ; + +// condition function return boolean value +conditionFunctionBase + : LIKE + | IF + | ISNULL + | ISNOTNULL + | IFNULL + | NULLIF + | ISPRESENT + | JSON_VALID + ; + +systemFunctionName + : TYPEOF + ; + +textFunctionName + : SUBSTR + | SUBSTRING + | TRIM + | LTRIM + | RTRIM + | LOWER + | UPPER + | CONCAT + | CONCAT_WS + | LENGTH + | STRCMP + | RIGHT + | LEFT + | ASCII + | LOCATE + | REPLACE + | REVERSE + | ISEMPTY + | ISBLANK + ; + +jsonFunctionName + : JSON + | JSON_OBJECT + | JSON_ARRAY + | JSON_ARRAY_LENGTH + | TO_JSON_STRING + | JSON_EXTRACT + | JSON_KEYS + | JSON_VALID +// | JSON_APPEND +// | JSON_DELETE +// | JSON_EXTEND +// | JSON_SET +// | JSON_ARRAY_ALL_MATCH +// | JSON_ARRAY_ANY_MATCH +// | JSON_ARRAY_FILTER +// | JSON_ARRAY_MAP +// | JSON_ARRAY_REDUCE + ; + +collectionFunctionName + : ARRAY + | ARRAY_LENGTH + ; + +lambdaFunctionName + : FORALL + | EXISTS + | FILTER + | TRANSFORM + | REDUCE + ; + +positionFunctionName + : POSITION + ; + +coalesceFunctionName + : COALESCE + ; + +// operators + comparisonOperator + : EQUAL + | NOT_EQUAL + | LESS + | NOT_LESS + | GREATER + | NOT_GREATER + | REGEXP + ; + +singleFieldRelevanceFunctionName + : MATCH + | MATCH_PHRASE + | MATCH_BOOL_PREFIX + | MATCH_PHRASE_PREFIX + ; + +multiFieldRelevanceFunctionName + : SIMPLE_QUERY_STRING + | MULTI_MATCH + | QUERY_STRING + ; + +// literals and values +literalValue + : stringLiteral + | integerLiteral + | decimalLiteral + | booleanLiteral + | datetimeLiteral //#datetime + | intervalLiteral + ; + +intervalLiteral + : INTERVAL valueExpression intervalUnit + ; + +stringLiteral + : DQUOTA_STRING + | SQUOTA_STRING + ; + +integerLiteral + : (PLUS | MINUS)? INTEGER_LITERAL + ; + +decimalLiteral + : (PLUS | MINUS)? DECIMAL_LITERAL + ; + +booleanLiteral + : TRUE + | FALSE + ; + +// Date and Time Literal, follow ANSI 92 +datetimeLiteral + : dateLiteral + | timeLiteral + | timestampLiteral + ; + +dateLiteral + : DATE date = stringLiteral + ; + +timeLiteral + : TIME time = stringLiteral + ; + +timestampLiteral + : TIMESTAMP timestamp = stringLiteral + ; + +intervalUnit + : MICROSECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + | SECOND_MICROSECOND + | MINUTE_MICROSECOND + | MINUTE_SECOND + | HOUR_MICROSECOND + | HOUR_SECOND + | HOUR_MINUTE + | DAY_MICROSECOND + | DAY_SECOND + | DAY_MINUTE + | DAY_HOUR + | YEAR_MONTH + ; + +timespanUnit + : MS + | S + | M + | H + | D + | W + | Q + | Y + | MILLISECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + ; + +valueList + : LT_PRTHS literalValue (COMMA literalValue)* RT_PRTHS + ; + +qualifiedName + : ident (DOT ident)* # identsAsQualifiedName + ; + +identifierSeq + : qualifiedName (COMMA qualifiedName)* # identsAsQualifiedNameSeq + | LT_PRTHS qualifiedName (COMMA qualifiedName)* RT_PRTHS # identsAsQualifiedNameSeq + ; + +tableQualifiedName + : tableIdent (DOT ident)* # identsAsTableQualifiedName + ; + +wcQualifiedName + : wildcard (DOT wildcard)* # identsAsWildcardQualifiedName + ; + +ident + : (DOT)? ID + | BACKTICK ident BACKTICK + | BQUOTA_STRING + | keywordsCanBeId + ; + +tableIdent + : (CLUSTER)? ident + ; + +wildcard + : ident (MODULE ident)* (MODULE)? + | SINGLE_QUOTE wildcard SINGLE_QUOTE + | DOUBLE_QUOTE wildcard DOUBLE_QUOTE + | BACKTICK wildcard BACKTICK + ; + +keywordsCanBeId + : D // OD SQL and ODBC special + | timespanUnit + | SPAN + | evalFunctionName + | relevanceArgName + | intervalUnit + | dateTimeFunctionName + | textFunctionName + | jsonFunctionName + | mathematicalFunctionName + | positionFunctionName + | cryptographicFunctionName + | singleFieldRelevanceFunctionName + | multiFieldRelevanceFunctionName + | commandName + | comparisonOperator + | explainMode + | correlationType + // commands assist keywords + | IN + | SOURCE + | INDEX + | DESC + | DATASOURCES + | AUTO + | STR + | IP + | NUM + | FROM + | PATTERN + | NEW_FIELD + | SCOPE + | MAPPING + | WITH + | USING + | CAST + | GET_FORMAT + | EXTRACT + | INTERVAL + | PLUS + | MINUS + | INCLUDEFIELDS + | NULLS + // ARGUMENT KEYWORDS + | KEEPEMPTY + | CONSECUTIVE + | DEDUP_SPLITVALUES + | PARTITIONS + | ALLNUM + | DELIM + | CENTROIDS + | ITERATIONS + | DISTANCE_TYPE + | NUMBER_OF_TREES + | SHINGLE_SIZE + | SAMPLE_SIZE + | OUTPUT_AFTER + | TIME_DECAY + | ANOMALY_RATE + | CATEGORY_FIELD + | TIME_FIELD + | TIME_ZONE + | TRAINING_DATA_SIZE + | ANOMALY_SCORE_THRESHOLD + // AGGREGATIONS + | statsFunctionName + | DISTINCT_COUNT + | PERCENTILE + | PERCENTILE_APPROX + | ESTDC + | ESTDC_ERROR + | MEAN + | MEDIAN + | MODE + | RANGE + | STDEV + | STDEVP + | SUMSQ + | VAR_SAMP + | VAR_POP + | TAKE + | FIRST + | LAST + | LIST + | VALUES + | EARLIEST + | EARLIEST_TIME + | LATEST + | LATEST_TIME + | PER_DAY + | PER_HOUR + | PER_MINUTE + | PER_SECOND + | RATE + | SPARKLINE + | C + | DC + // JOIN TYPE + | OUTER + | INNER + | CROSS + | LEFT + | RIGHT + | FULL + | SEMI + | ANTI + | BETWEEN + | CIDRMATCH + | trendlineType + ; diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java index 5f88ea9ca0..a390924e85 100644 --- a/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java @@ -23,6 +23,7 @@ import org.opensearch.sql.spark.execution.session.SessionManager; import org.opensearch.sql.spark.rest.model.LangType; import org.opensearch.sql.spark.utils.SQLQueryUtils; +import org.opensearch.sql.spark.validator.PPLQueryValidator; import org.opensearch.sql.spark.validator.SQLQueryValidator; /** This class takes care of understanding query and dispatching job query to emr serverless. */ @@ -39,6 +40,7 @@ public class SparkQueryDispatcher { private final QueryHandlerFactory queryHandlerFactory; private final QueryIdProvider queryIdProvider; private final SQLQueryValidator sqlQueryValidator; + private final PPLQueryValidator pplQueryValidator; public DispatchQueryResponse dispatch( DispatchQueryRequest dispatchQueryRequest, @@ -47,9 +49,8 @@ public DispatchQueryResponse dispatch( this.dataSourceService.verifyDataSourceAccessAndGetRawMetadata( dispatchQueryRequest.getDatasource(), asyncQueryRequestContext); + String query = dispatchQueryRequest.getQuery(); if (LangType.SQL.equals(dispatchQueryRequest.getLangType())) { - String query = dispatchQueryRequest.getQuery(); - if (SQLQueryUtils.isFlintExtensionQuery(query)) { sqlQueryValidator.validateFlintExtensionQuery(query, dataSourceMetadata.getConnector()); return handleFlintExtensionQuery( @@ -57,6 +58,8 @@ public DispatchQueryResponse dispatch( } sqlQueryValidator.validate(query, dataSourceMetadata.getConnector()); + } else if (LangType.PPL.equals(dispatchQueryRequest.getLangType())) { + pplQueryValidator.validate(query, dataSourceMetadata.getConnector()); } return handleDefaultQuery(dispatchQueryRequest, asyncQueryRequestContext, dataSourceMetadata); } diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/CWLPPLGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/CWLPPLGrammarElementValidator.java new file mode 100644 index 0000000000..e95c3350af --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/CWLPPLGrammarElementValidator.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.opensearch.sql.spark.validator.PPLGrammarElement.DESCRIBE_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.EXPAND_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.FILLNULL_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.FLATTEN_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.IPADDRESS_FUNCTIONS; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.JOIN_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.JSON_FUNCTIONS; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.LAMBDA_FUNCTIONS; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.LOOKUP_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.PATTERNS_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.SUBQUERY_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.TYPE_CONVERSION_FUNCTIONS; + +import com.google.common.collect.ImmutableSet; +import java.util.Set; + +public class CWLPPLGrammarElementValidator extends DenyListGrammarElementValidator { + private static final Set CWL_DENY_LIST = + ImmutableSet.builder() + .add( + PATTERNS_COMMAND, + JOIN_COMMAND, + LOOKUP_COMMAND, + SUBQUERY_COMMAND, + FLATTEN_COMMAND, + FILLNULL_COMMAND, + EXPAND_COMMAND, + DESCRIBE_COMMAND, + IPADDRESS_FUNCTIONS, + JSON_FUNCTIONS, + LAMBDA_FUNCTIONS, + TYPE_CONVERSION_FUNCTIONS) + .build(); + + public CWLPPLGrammarElementValidator() { + super(CWL_DENY_LIST); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElement.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElement.java index ab89348f33..be2394393c 100644 --- a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElement.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElement.java @@ -5,87 +5,4 @@ package org.opensearch.sql.spark.validator; -import lombok.AllArgsConstructor; - -@AllArgsConstructor -public enum GrammarElement { - ALTER_NAMESPACE("ALTER (DATABASE|TABLE|NAMESPACE)"), - ALTER_VIEW("ALTER VIEW"), - CREATE_NAMESPACE("CREATE (DATABASE|TABLE|NAMESPACE)"), - CREATE_FUNCTION("CREATE FUNCTION"), - CREATE_VIEW("CREATE VIEW"), - DROP_NAMESPACE("DROP (DATABASE|TABLE|NAMESPACE)"), - DROP_FUNCTION("DROP FUNCTION"), - DROP_VIEW("DROP VIEW"), - DROP_TABLE("DROP TABLE"), - REPAIR_TABLE("REPAIR TABLE"), - TRUNCATE_TABLE("TRUNCATE TABLE"), - // DML Statements - INSERT("INSERT"), - LOAD("LOAD"), - - // Data Retrieval Statements - EXPLAIN("EXPLAIN"), - WITH("WITH"), - CLUSTER_BY("CLUSTER BY"), - DISTRIBUTE_BY("DISTRIBUTE BY"), - // GROUP_BY("GROUP BY"), - // HAVING("HAVING"), - HINTS("HINTS"), - INLINE_TABLE("Inline Table(VALUES)"), - FILE("File"), - INNER_JOIN("INNER JOIN"), - CROSS_JOIN("CROSS JOIN"), - LEFT_OUTER_JOIN("LEFT OUTER JOIN"), - LEFT_SEMI_JOIN("LEFT SEMI JOIN"), - RIGHT_OUTER_JOIN("RIGHT OUTER JOIN"), - FULL_OUTER_JOIN("FULL OUTER JOIN"), - LEFT_ANTI_JOIN("LEFT ANTI JOIN"), - TABLESAMPLE("TABLESAMPLE"), - TABLE_VALUED_FUNCTION("Table-valued function"), - LATERAL_VIEW("LATERAL VIEW"), - LATERAL_SUBQUERY("LATERAL SUBQUERY"), - TRANSFORM("TRANSFORM"), - - // Auxiliary Statements - MANAGE_RESOURCE("Resource management statements"), - ANALYZE_TABLE("ANALYZE TABLE(S)"), - CACHE_TABLE("CACHE TABLE"), - CLEAR_CACHE("CLEAR CACHE"), - DESCRIBE_NAMESPACE("DESCRIBE (NAMESPACE|DATABASE|SCHEMA)"), - DESCRIBE_FUNCTION("DESCRIBE FUNCTION"), - DESCRIBE_QUERY("DESCRIBE QUERY"), - DESCRIBE_TABLE("DESCRIBE TABLE"), - REFRESH_RESOURCE("REFRESH"), - REFRESH_TABLE("REFRESH TABLE"), - REFRESH_FUNCTION("REFRESH FUNCTION"), - RESET("RESET"), - SET("SET"), - SHOW_COLUMNS("SHOW COLUMNS"), - SHOW_CREATE_TABLE("SHOW CREATE TABLE"), - SHOW_NAMESPACES("SHOW (DATABASES|SCHEMAS)"), - SHOW_FUNCTIONS("SHOW FUNCTIONS"), - SHOW_PARTITIONS("SHOW PARTITIONS"), - SHOW_TABLE_EXTENDED("SHOW TABLE EXTENDED"), - SHOW_TABLES("SHOW TABLES"), - SHOW_TBLPROPERTIES("SHOW TBLPROPERTIES"), - SHOW_VIEWS("SHOW VIEWS"), - UNCACHE_TABLE("UNCACHE TABLE"), - - // Functions - MAP_FUNCTIONS("Map functions"), - BITWISE_FUNCTIONS("Bitwise functions"), - CSV_FUNCTIONS("CSV functions"), - GENERATOR_FUNCTIONS("Generator functions"), - MISC_FUNCTIONS("Misc functions"), - - // UDF - UDF("User Defined functions"); - - String description; - - @Override - public String toString() { - return description; - } -} +public interface GrammarElement {} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLGrammarElement.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLGrammarElement.java new file mode 100644 index 0000000000..93c4ea8526 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLGrammarElement.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public enum PPLGrammarElement implements GrammarElement { + PATTERNS_COMMAND("patterns command"), + JOIN_COMMAND("join command"), + LOOKUP_COMMAND("lookup command"), + SUBQUERY_COMMAND("subquery command"), + FLATTEN_COMMAND("flatten command"), + FILLNULL_COMMAND("fillnull command"), + EXPAND_COMMAND("expand command"), + DESCRIBE_COMMAND("describe command"), + IPADDRESS_FUNCTIONS("IP address functions"), + JSON_FUNCTIONS("JSON functions"), + LAMBDA_FUNCTIONS("Lambda functions"), + TYPE_CONVERSION_FUNCTIONS("Type conversion functions"); + + String description; + + @Override + public String toString() { + return description; + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidationVisitor.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidationVisitor.java new file mode 100644 index 0000000000..d829dd17a5 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidationVisitor.java @@ -0,0 +1,87 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; +import org.opensearch.sql.spark.antlr.parser.OpenSearchPPLParser.*; + +@AllArgsConstructor +public class PPLQueryValidationVisitor + extends org.opensearch.sql.spark.antlr.parser.OpenSearchPPLParserBaseVisitor { + private final GrammarElementValidator grammarElementValidator; + + @Override + public Void visitPatternsCommand(PatternsCommandContext ctx) { + validateAllowed(PPLGrammarElement.PATTERNS_COMMAND); + return super.visitPatternsCommand(ctx); + } + + @Override + public Void visitJoinCommand(JoinCommandContext ctx) { + validateAllowed(PPLGrammarElement.JOIN_COMMAND); + return super.visitJoinCommand(ctx); + } + + @Override + public Void visitLookupCommand(LookupCommandContext ctx) { + validateAllowed(PPLGrammarElement.LOOKUP_COMMAND); + return super.visitLookupCommand(ctx); + } + + @Override + public Void visitSubSearch(SubSearchContext ctx) { + validateAllowed(PPLGrammarElement.SUBQUERY_COMMAND); + return super.visitSubSearch(ctx); + } + + @Override + public Void visitFlattenCommand(FlattenCommandContext ctx) { + validateAllowed(PPLGrammarElement.FLATTEN_COMMAND); + return super.visitFlattenCommand(ctx); + } + + @Override + public Void visitFillnullCommand(FillnullCommandContext ctx) { + validateAllowed(PPLGrammarElement.FILLNULL_COMMAND); + return super.visitFillnullCommand(ctx); + } + + @Override + public Void visitExpandCommand(ExpandCommandContext ctx) { + validateAllowed(PPLGrammarElement.EXPAND_COMMAND); + return super.visitExpandCommand(ctx); + } + + @Override + public Void visitDescribeCommand(DescribeCommandContext ctx) { + validateAllowed(PPLGrammarElement.DESCRIBE_COMMAND); + return super.visitDescribeCommand(ctx); + } + + @Override + public Void visitCidrMatchFunctionCall(CidrMatchFunctionCallContext ctx) { + validateAllowed(PPLGrammarElement.IPADDRESS_FUNCTIONS); + return super.visitCidrMatchFunctionCall(ctx); + } + + @Override + public Void visitJsonFunctionName(JsonFunctionNameContext ctx) { + validateAllowed(PPLGrammarElement.JSON_FUNCTIONS); + return super.visitJsonFunctionName(ctx); + } + + @Override + public Void visitLambdaFunctionName(LambdaFunctionNameContext ctx) { + validateAllowed(PPLGrammarElement.LAMBDA_FUNCTIONS); + return super.visitLambdaFunctionName(ctx); + } + + private void validateAllowed(PPLGrammarElement element) { + if (!grammarElementValidator.isValid(element)) { + throw new IllegalArgumentException(element + " is not allowed."); + } + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidator.java new file mode 100644 index 0000000000..e630ffc45f --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidator.java @@ -0,0 +1,50 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; +import org.antlr.v4.runtime.CommonTokenStream; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.common.antlr.SyntaxAnalysisErrorListener; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.antlr.parser.OpenSearchPPLLexer; +import org.opensearch.sql.spark.antlr.parser.OpenSearchPPLParser; + +@AllArgsConstructor +public class PPLQueryValidator { + private static final Logger log = LogManager.getLogger(SQLQueryValidator.class); + + private final GrammarElementValidatorProvider grammarElementValidatorProvider; + + /** + * It will look up validator associated with the DataSourceType, and throw + * IllegalArgumentException if invalid grammar element is found. + * + * @param pplQuery The query to be validated + * @param datasourceType + */ + public void validate(String pplQuery, DataSourceType datasourceType) { + GrammarElementValidator grammarElementValidator = + grammarElementValidatorProvider.getValidatorForDatasource(datasourceType); + PPLQueryValidationVisitor visitor = new PPLQueryValidationVisitor(grammarElementValidator); + try { + visitor.visit(getPplParser(pplQuery).root()); + } catch (IllegalArgumentException e) { + log.error("Query validation failed. DataSourceType=" + datasourceType, e); + throw e; + } + } + + public static OpenSearchPPLParser getPplParser(String pplQuery) { + OpenSearchPPLParser sqlBaseParser = + new OpenSearchPPLParser( + new CommonTokenStream(new OpenSearchPPLLexer(new CaseInsensitiveCharStream(pplQuery)))); + sqlBaseParser.addErrorListener(new SyntaxAnalysisErrorListener()); + return sqlBaseParser; + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueGrammarElementValidator.java deleted file mode 100644 index 668ab26d68..0000000000 --- a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueGrammarElementValidator.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.validator; - -import static org.opensearch.sql.spark.validator.GrammarElement.ALTER_VIEW; -import static org.opensearch.sql.spark.validator.GrammarElement.BITWISE_FUNCTIONS; -import static org.opensearch.sql.spark.validator.GrammarElement.CLUSTER_BY; -import static org.opensearch.sql.spark.validator.GrammarElement.CREATE_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.CREATE_VIEW; -import static org.opensearch.sql.spark.validator.GrammarElement.CROSS_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.DESCRIBE_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.DISTRIBUTE_BY; -import static org.opensearch.sql.spark.validator.GrammarElement.DROP_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.DROP_VIEW; -import static org.opensearch.sql.spark.validator.GrammarElement.FILE; -import static org.opensearch.sql.spark.validator.GrammarElement.FULL_OUTER_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.HINTS; -import static org.opensearch.sql.spark.validator.GrammarElement.INLINE_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.INSERT; -import static org.opensearch.sql.spark.validator.GrammarElement.LEFT_ANTI_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.LEFT_SEMI_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.LOAD; -import static org.opensearch.sql.spark.validator.GrammarElement.MANAGE_RESOURCE; -import static org.opensearch.sql.spark.validator.GrammarElement.MISC_FUNCTIONS; -import static org.opensearch.sql.spark.validator.GrammarElement.REFRESH_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.REFRESH_RESOURCE; -import static org.opensearch.sql.spark.validator.GrammarElement.RESET; -import static org.opensearch.sql.spark.validator.GrammarElement.RIGHT_OUTER_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.SET; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_FUNCTIONS; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_VIEWS; -import static org.opensearch.sql.spark.validator.GrammarElement.TABLESAMPLE; -import static org.opensearch.sql.spark.validator.GrammarElement.TABLE_VALUED_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.TRANSFORM; -import static org.opensearch.sql.spark.validator.GrammarElement.UDF; - -import com.google.common.collect.ImmutableSet; -import java.util.Set; - -public class S3GlueGrammarElementValidator extends DenyListGrammarElementValidator { - private static final Set S3GLUE_DENY_LIST = - ImmutableSet.builder() - .add( - ALTER_VIEW, - CREATE_FUNCTION, - CREATE_VIEW, - DROP_FUNCTION, - DROP_VIEW, - INSERT, - LOAD, - CLUSTER_BY, - DISTRIBUTE_BY, - HINTS, - INLINE_TABLE, - FILE, - CROSS_JOIN, - LEFT_SEMI_JOIN, - RIGHT_OUTER_JOIN, - FULL_OUTER_JOIN, - LEFT_ANTI_JOIN, - TABLESAMPLE, - TABLE_VALUED_FUNCTION, - TRANSFORM, - MANAGE_RESOURCE, - DESCRIBE_FUNCTION, - REFRESH_RESOURCE, - REFRESH_FUNCTION, - RESET, - SET, - SHOW_FUNCTIONS, - SHOW_VIEWS, - BITWISE_FUNCTIONS, - MISC_FUNCTIONS, - UDF) - .build(); - - public S3GlueGrammarElementValidator() { - super(S3GLUE_DENY_LIST); - } -} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueSQLGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueSQLGrammarElementValidator.java new file mode 100644 index 0000000000..870fb9412d --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueSQLGrammarElementValidator.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ALTER_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.BITWISE_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CLUSTER_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CROSS_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DISTRIBUTE_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FILE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FULL_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.HINTS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INLINE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INSERT; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_ANTI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_SEMI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LOAD; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MANAGE_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MISC_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RESET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RIGHT_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_VIEWS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLESAMPLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLE_VALUED_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TRANSFORM; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.UDF; + +import com.google.common.collect.ImmutableSet; +import java.util.Set; + +public class S3GlueSQLGrammarElementValidator extends DenyListGrammarElementValidator { + private static final Set S3GLUE_DENY_LIST = + ImmutableSet.builder() + .add( + ALTER_VIEW, + CREATE_FUNCTION, + CREATE_VIEW, + DROP_FUNCTION, + DROP_VIEW, + INSERT, + LOAD, + CLUSTER_BY, + DISTRIBUTE_BY, + HINTS, + INLINE_TABLE, + FILE, + CROSS_JOIN, + LEFT_SEMI_JOIN, + RIGHT_OUTER_JOIN, + FULL_OUTER_JOIN, + LEFT_ANTI_JOIN, + TABLESAMPLE, + TABLE_VALUED_FUNCTION, + TRANSFORM, + MANAGE_RESOURCE, + DESCRIBE_FUNCTION, + REFRESH_RESOURCE, + REFRESH_FUNCTION, + RESET, + SET, + SHOW_FUNCTIONS, + SHOW_VIEWS, + BITWISE_FUNCTIONS, + MISC_FUNCTIONS, + UDF) + .build(); + + public S3GlueSQLGrammarElementValidator() { + super(S3GLUE_DENY_LIST); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLGrammarElement.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLGrammarElement.java new file mode 100644 index 0000000000..ef3e1f2c8c --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLGrammarElement.java @@ -0,0 +1,91 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public enum SQLGrammarElement implements GrammarElement { + ALTER_NAMESPACE("ALTER (DATABASE|TABLE|NAMESPACE)"), + ALTER_VIEW("ALTER VIEW"), + CREATE_NAMESPACE("CREATE (DATABASE|TABLE|NAMESPACE)"), + CREATE_FUNCTION("CREATE FUNCTION"), + CREATE_VIEW("CREATE VIEW"), + DROP_NAMESPACE("DROP (DATABASE|TABLE|NAMESPACE)"), + DROP_FUNCTION("DROP FUNCTION"), + DROP_VIEW("DROP VIEW"), + DROP_TABLE("DROP TABLE"), + REPAIR_TABLE("REPAIR TABLE"), + TRUNCATE_TABLE("TRUNCATE TABLE"), + // DML Statements + INSERT("INSERT"), + LOAD("LOAD"), + + // Data Retrieval Statements + EXPLAIN("EXPLAIN"), + WITH("WITH"), + CLUSTER_BY("CLUSTER BY"), + DISTRIBUTE_BY("DISTRIBUTE BY"), + // GROUP_BY("GROUP BY"), + // HAVING("HAVING"), + HINTS("HINTS"), + INLINE_TABLE("Inline Table(VALUES)"), + FILE("File"), + INNER_JOIN("INNER JOIN"), + CROSS_JOIN("CROSS JOIN"), + LEFT_OUTER_JOIN("LEFT OUTER JOIN"), + LEFT_SEMI_JOIN("LEFT SEMI JOIN"), + RIGHT_OUTER_JOIN("RIGHT OUTER JOIN"), + FULL_OUTER_JOIN("FULL OUTER JOIN"), + LEFT_ANTI_JOIN("LEFT ANTI JOIN"), + TABLESAMPLE("TABLESAMPLE"), + TABLE_VALUED_FUNCTION("Table-valued function"), + LATERAL_VIEW("LATERAL VIEW"), + LATERAL_SUBQUERY("LATERAL SUBQUERY"), + TRANSFORM("TRANSFORM"), + + // Auxiliary Statements + MANAGE_RESOURCE("Resource management statements"), + ANALYZE_TABLE("ANALYZE TABLE(S)"), + CACHE_TABLE("CACHE TABLE"), + CLEAR_CACHE("CLEAR CACHE"), + DESCRIBE_NAMESPACE("DESCRIBE (NAMESPACE|DATABASE|SCHEMA)"), + DESCRIBE_FUNCTION("DESCRIBE FUNCTION"), + DESCRIBE_QUERY("DESCRIBE QUERY"), + DESCRIBE_TABLE("DESCRIBE TABLE"), + REFRESH_RESOURCE("REFRESH"), + REFRESH_TABLE("REFRESH TABLE"), + REFRESH_FUNCTION("REFRESH FUNCTION"), + RESET("RESET"), + SET("SET"), + SHOW_COLUMNS("SHOW COLUMNS"), + SHOW_CREATE_TABLE("SHOW CREATE TABLE"), + SHOW_NAMESPACES("SHOW (DATABASES|SCHEMAS)"), + SHOW_FUNCTIONS("SHOW FUNCTIONS"), + SHOW_PARTITIONS("SHOW PARTITIONS"), + SHOW_TABLE_EXTENDED("SHOW TABLE EXTENDED"), + SHOW_TABLES("SHOW TABLES"), + SHOW_TBLPROPERTIES("SHOW TBLPROPERTIES"), + SHOW_VIEWS("SHOW VIEWS"), + UNCACHE_TABLE("UNCACHE TABLE"), + + // Functions + MAP_FUNCTIONS("Map functions"), + BITWISE_FUNCTIONS("Bitwise functions"), + CSV_FUNCTIONS("CSV functions"), + GENERATOR_FUNCTIONS("Generator functions"), + MISC_FUNCTIONS("Misc functions"), + + // UDF + UDF("User Defined functions"); + + String description; + + @Override + public String toString() { + return description; + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidationVisitor.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidationVisitor.java index 2312c0de7a..fc776218d7 100644 --- a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidationVisitor.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidationVisitor.java @@ -87,100 +87,100 @@ public class SQLQueryValidationVisitor extends SqlBaseParserBaseVisitor { @Override public Void visitCreateFunction(SqlBaseParser.CreateFunctionContext ctx) { - validateAllowed(GrammarElement.CREATE_FUNCTION); + validateAllowed(SQLGrammarElement.CREATE_FUNCTION); return super.visitCreateFunction(ctx); } @Override public Void visitSetNamespaceProperties(SetNamespacePropertiesContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitSetNamespaceProperties(ctx); } @Override public Void visitAddTableColumns(AddTableColumnsContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitAddTableColumns(ctx); } @Override public Void visitAddTablePartition(AddTablePartitionContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitAddTablePartition(ctx); } @Override public Void visitRenameTableColumn(RenameTableColumnContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitRenameTableColumn(ctx); } @Override public Void visitDropTableColumns(DropTableColumnsContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitDropTableColumns(ctx); } @Override public Void visitAlterTableAlterColumn(AlterTableAlterColumnContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitAlterTableAlterColumn(ctx); } @Override public Void visitHiveReplaceColumns(HiveReplaceColumnsContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitHiveReplaceColumns(ctx); } @Override public Void visitSetTableSerDe(SetTableSerDeContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitSetTableSerDe(ctx); } @Override public Void visitRenameTablePartition(RenameTablePartitionContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitRenameTablePartition(ctx); } @Override public Void visitDropTablePartitions(DropTablePartitionsContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitDropTablePartitions(ctx); } @Override public Void visitSetTableLocation(SetTableLocationContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitSetTableLocation(ctx); } @Override public Void visitRecoverPartitions(RecoverPartitionsContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitRecoverPartitions(ctx); } @Override public Void visitSetNamespaceLocation(SetNamespaceLocationContext ctx) { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); return super.visitSetNamespaceLocation(ctx); } @Override public Void visitAlterViewQuery(AlterViewQueryContext ctx) { - validateAllowed(GrammarElement.ALTER_VIEW); + validateAllowed(SQLGrammarElement.ALTER_VIEW); return super.visitAlterViewQuery(ctx); } @Override public Void visitRenameTable(RenameTableContext ctx) { if (ctx.VIEW() != null) { - validateAllowed(GrammarElement.ALTER_VIEW); + validateAllowed(SQLGrammarElement.ALTER_VIEW); } else { - validateAllowed(GrammarElement.ALTER_NAMESPACE); + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); } return super.visitRenameTable(ctx); @@ -188,109 +188,109 @@ public Void visitRenameTable(RenameTableContext ctx) { @Override public Void visitCreateNamespace(CreateNamespaceContext ctx) { - validateAllowed(GrammarElement.CREATE_NAMESPACE); + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); return super.visitCreateNamespace(ctx); } @Override public Void visitCreateTable(CreateTableContext ctx) { - validateAllowed(GrammarElement.CREATE_NAMESPACE); + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); return super.visitCreateTable(ctx); } @Override public Void visitCreateTableLike(CreateTableLikeContext ctx) { - validateAllowed(GrammarElement.CREATE_NAMESPACE); + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); return super.visitCreateTableLike(ctx); } @Override public Void visitReplaceTable(ReplaceTableContext ctx) { - validateAllowed(GrammarElement.CREATE_NAMESPACE); + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); return super.visitReplaceTable(ctx); } @Override public Void visitDropNamespace(DropNamespaceContext ctx) { - validateAllowed(GrammarElement.DROP_NAMESPACE); + validateAllowed(SQLGrammarElement.DROP_NAMESPACE); return super.visitDropNamespace(ctx); } @Override public Void visitDropTable(DropTableContext ctx) { - validateAllowed(GrammarElement.DROP_NAMESPACE); + validateAllowed(SQLGrammarElement.DROP_NAMESPACE); return super.visitDropTable(ctx); } @Override public Void visitCreateView(CreateViewContext ctx) { - validateAllowed(GrammarElement.CREATE_VIEW); + validateAllowed(SQLGrammarElement.CREATE_VIEW); return super.visitCreateView(ctx); } @Override public Void visitDropView(DropViewContext ctx) { - validateAllowed(GrammarElement.DROP_VIEW); + validateAllowed(SQLGrammarElement.DROP_VIEW); return super.visitDropView(ctx); } @Override public Void visitDropFunction(DropFunctionContext ctx) { - validateAllowed(GrammarElement.DROP_FUNCTION); + validateAllowed(SQLGrammarElement.DROP_FUNCTION); return super.visitDropFunction(ctx); } @Override public Void visitRepairTable(RepairTableContext ctx) { - validateAllowed(GrammarElement.REPAIR_TABLE); + validateAllowed(SQLGrammarElement.REPAIR_TABLE); return super.visitRepairTable(ctx); } @Override public Void visitTruncateTable(TruncateTableContext ctx) { - validateAllowed(GrammarElement.TRUNCATE_TABLE); + validateAllowed(SQLGrammarElement.TRUNCATE_TABLE); return super.visitTruncateTable(ctx); } @Override public Void visitInsertOverwriteTable(InsertOverwriteTableContext ctx) { - validateAllowed(GrammarElement.INSERT); + validateAllowed(SQLGrammarElement.INSERT); return super.visitInsertOverwriteTable(ctx); } @Override public Void visitInsertIntoReplaceWhere(InsertIntoReplaceWhereContext ctx) { - validateAllowed(GrammarElement.INSERT); + validateAllowed(SQLGrammarElement.INSERT); return super.visitInsertIntoReplaceWhere(ctx); } @Override public Void visitInsertIntoTable(InsertIntoTableContext ctx) { - validateAllowed(GrammarElement.INSERT); + validateAllowed(SQLGrammarElement.INSERT); return super.visitInsertIntoTable(ctx); } @Override public Void visitInsertOverwriteDir(InsertOverwriteDirContext ctx) { - validateAllowed(GrammarElement.INSERT); + validateAllowed(SQLGrammarElement.INSERT); return super.visitInsertOverwriteDir(ctx); } @Override public Void visitInsertOverwriteHiveDir(InsertOverwriteHiveDirContext ctx) { - validateAllowed(GrammarElement.INSERT); + validateAllowed(SQLGrammarElement.INSERT); return super.visitInsertOverwriteHiveDir(ctx); } @Override public Void visitLoadData(LoadDataContext ctx) { - validateAllowed(GrammarElement.LOAD); + validateAllowed(SQLGrammarElement.LOAD); return super.visitLoadData(ctx); } @Override public Void visitExplain(ExplainContext ctx) { - validateAllowed(GrammarElement.EXPLAIN); + validateAllowed(SQLGrammarElement.EXPLAIN); return super.visitExplain(ctx); } @@ -298,7 +298,7 @@ public Void visitExplain(ExplainContext ctx) { public Void visitTableName(TableNameContext ctx) { String reference = ctx.identifierReference().getText(); if (isFileReference(reference)) { - validateAllowed(GrammarElement.FILE); + validateAllowed(SQLGrammarElement.FILE); } return super.visitTableName(ctx); } @@ -311,74 +311,74 @@ private boolean isFileReference(String reference) { @Override public Void visitCtes(CtesContext ctx) { - validateAllowed(GrammarElement.WITH); + validateAllowed(SQLGrammarElement.WITH); return super.visitCtes(ctx); } @Override public Void visitQueryOrganization(QueryOrganizationContext ctx) { if (ctx.CLUSTER() != null) { - validateAllowed(GrammarElement.CLUSTER_BY); + validateAllowed(SQLGrammarElement.CLUSTER_BY); } else if (ctx.DISTRIBUTE() != null) { - validateAllowed(GrammarElement.DISTRIBUTE_BY); + validateAllowed(SQLGrammarElement.DISTRIBUTE_BY); } return super.visitQueryOrganization(ctx); } @Override public Void visitHint(HintContext ctx) { - validateAllowed(GrammarElement.HINTS); + validateAllowed(SQLGrammarElement.HINTS); return super.visitHint(ctx); } @Override public Void visitInlineTable(InlineTableContext ctx) { - validateAllowed(GrammarElement.INLINE_TABLE); + validateAllowed(SQLGrammarElement.INLINE_TABLE); return super.visitInlineTable(ctx); } @Override public Void visitJoinType(JoinTypeContext ctx) { if (ctx.CROSS() != null) { - validateAllowed(GrammarElement.CROSS_JOIN); + validateAllowed(SQLGrammarElement.CROSS_JOIN); } else if (ctx.LEFT() != null && ctx.SEMI() != null) { - validateAllowed(GrammarElement.LEFT_SEMI_JOIN); + validateAllowed(SQLGrammarElement.LEFT_SEMI_JOIN); } else if (ctx.ANTI() != null) { - validateAllowed(GrammarElement.LEFT_ANTI_JOIN); + validateAllowed(SQLGrammarElement.LEFT_ANTI_JOIN); } else if (ctx.LEFT() != null) { - validateAllowed(GrammarElement.LEFT_OUTER_JOIN); + validateAllowed(SQLGrammarElement.LEFT_OUTER_JOIN); } else if (ctx.RIGHT() != null) { - validateAllowed(GrammarElement.RIGHT_OUTER_JOIN); + validateAllowed(SQLGrammarElement.RIGHT_OUTER_JOIN); } else if (ctx.FULL() != null) { - validateAllowed(GrammarElement.FULL_OUTER_JOIN); + validateAllowed(SQLGrammarElement.FULL_OUTER_JOIN); } else { - validateAllowed(GrammarElement.INNER_JOIN); + validateAllowed(SQLGrammarElement.INNER_JOIN); } return super.visitJoinType(ctx); } @Override public Void visitSample(SampleContext ctx) { - validateAllowed(GrammarElement.TABLESAMPLE); + validateAllowed(SQLGrammarElement.TABLESAMPLE); return super.visitSample(ctx); } @Override public Void visitTableValuedFunction(TableValuedFunctionContext ctx) { - validateAllowed(GrammarElement.TABLE_VALUED_FUNCTION); + validateAllowed(SQLGrammarElement.TABLE_VALUED_FUNCTION); return super.visitTableValuedFunction(ctx); } @Override public Void visitLateralView(LateralViewContext ctx) { - validateAllowed(GrammarElement.LATERAL_VIEW); + validateAllowed(SQLGrammarElement.LATERAL_VIEW); return super.visitLateralView(ctx); } @Override public Void visitRelation(RelationContext ctx) { if (ctx.LATERAL() != null) { - validateAllowed(GrammarElement.LATERAL_SUBQUERY); + validateAllowed(SQLGrammarElement.LATERAL_SUBQUERY); } return super.visitRelation(ctx); } @@ -386,7 +386,7 @@ public Void visitRelation(RelationContext ctx) { @Override public Void visitJoinRelation(JoinRelationContext ctx) { if (ctx.LATERAL() != null) { - validateAllowed(GrammarElement.LATERAL_SUBQUERY); + validateAllowed(SQLGrammarElement.LATERAL_SUBQUERY); } return super.visitJoinRelation(ctx); } @@ -394,158 +394,158 @@ public Void visitJoinRelation(JoinRelationContext ctx) { @Override public Void visitTransformClause(TransformClauseContext ctx) { if (ctx.TRANSFORM() != null) { - validateAllowed(GrammarElement.TRANSFORM); + validateAllowed(SQLGrammarElement.TRANSFORM); } return super.visitTransformClause(ctx); } @Override public Void visitManageResource(ManageResourceContext ctx) { - validateAllowed(GrammarElement.MANAGE_RESOURCE); + validateAllowed(SQLGrammarElement.MANAGE_RESOURCE); return super.visitManageResource(ctx); } @Override public Void visitAnalyze(AnalyzeContext ctx) { - validateAllowed(GrammarElement.ANALYZE_TABLE); + validateAllowed(SQLGrammarElement.ANALYZE_TABLE); return super.visitAnalyze(ctx); } @Override public Void visitAnalyzeTables(AnalyzeTablesContext ctx) { - validateAllowed(GrammarElement.ANALYZE_TABLE); + validateAllowed(SQLGrammarElement.ANALYZE_TABLE); return super.visitAnalyzeTables(ctx); } @Override public Void visitCacheTable(CacheTableContext ctx) { - validateAllowed(GrammarElement.CACHE_TABLE); + validateAllowed(SQLGrammarElement.CACHE_TABLE); return super.visitCacheTable(ctx); } @Override public Void visitClearCache(ClearCacheContext ctx) { - validateAllowed(GrammarElement.CLEAR_CACHE); + validateAllowed(SQLGrammarElement.CLEAR_CACHE); return super.visitClearCache(ctx); } @Override public Void visitDescribeNamespace(DescribeNamespaceContext ctx) { - validateAllowed(GrammarElement.DESCRIBE_NAMESPACE); + validateAllowed(SQLGrammarElement.DESCRIBE_NAMESPACE); return super.visitDescribeNamespace(ctx); } @Override public Void visitDescribeFunction(DescribeFunctionContext ctx) { - validateAllowed(GrammarElement.DESCRIBE_FUNCTION); + validateAllowed(SQLGrammarElement.DESCRIBE_FUNCTION); return super.visitDescribeFunction(ctx); } @Override public Void visitDescribeRelation(DescribeRelationContext ctx) { - validateAllowed(GrammarElement.DESCRIBE_TABLE); + validateAllowed(SQLGrammarElement.DESCRIBE_TABLE); return super.visitDescribeRelation(ctx); } @Override public Void visitDescribeQuery(DescribeQueryContext ctx) { - validateAllowed(GrammarElement.DESCRIBE_QUERY); + validateAllowed(SQLGrammarElement.DESCRIBE_QUERY); return super.visitDescribeQuery(ctx); } @Override public Void visitRefreshResource(RefreshResourceContext ctx) { - validateAllowed(GrammarElement.REFRESH_RESOURCE); + validateAllowed(SQLGrammarElement.REFRESH_RESOURCE); return super.visitRefreshResource(ctx); } @Override public Void visitRefreshTable(RefreshTableContext ctx) { - validateAllowed(GrammarElement.REFRESH_TABLE); + validateAllowed(SQLGrammarElement.REFRESH_TABLE); return super.visitRefreshTable(ctx); } @Override public Void visitRefreshFunction(RefreshFunctionContext ctx) { - validateAllowed(GrammarElement.REFRESH_FUNCTION); + validateAllowed(SQLGrammarElement.REFRESH_FUNCTION); return super.visitRefreshFunction(ctx); } @Override public Void visitResetConfiguration(ResetConfigurationContext ctx) { - validateAllowed(GrammarElement.RESET); + validateAllowed(SQLGrammarElement.RESET); return super.visitResetConfiguration(ctx); } @Override public Void visitResetQuotedConfiguration(ResetQuotedConfigurationContext ctx) { - validateAllowed(GrammarElement.RESET); + validateAllowed(SQLGrammarElement.RESET); return super.visitResetQuotedConfiguration(ctx); } @Override public Void visitSetConfiguration(SetConfigurationContext ctx) { - validateAllowed(GrammarElement.SET); + validateAllowed(SQLGrammarElement.SET); return super.visitSetConfiguration(ctx); } @Override public Void visitShowColumns(ShowColumnsContext ctx) { - validateAllowed(GrammarElement.SHOW_COLUMNS); + validateAllowed(SQLGrammarElement.SHOW_COLUMNS); return super.visitShowColumns(ctx); } @Override public Void visitShowCreateTable(ShowCreateTableContext ctx) { - validateAllowed(GrammarElement.SHOW_CREATE_TABLE); + validateAllowed(SQLGrammarElement.SHOW_CREATE_TABLE); return super.visitShowCreateTable(ctx); } @Override public Void visitShowNamespaces(ShowNamespacesContext ctx) { - validateAllowed(GrammarElement.SHOW_NAMESPACES); + validateAllowed(SQLGrammarElement.SHOW_NAMESPACES); return super.visitShowNamespaces(ctx); } @Override public Void visitShowFunctions(ShowFunctionsContext ctx) { - validateAllowed(GrammarElement.SHOW_FUNCTIONS); + validateAllowed(SQLGrammarElement.SHOW_FUNCTIONS); return super.visitShowFunctions(ctx); } @Override public Void visitShowPartitions(ShowPartitionsContext ctx) { - validateAllowed(GrammarElement.SHOW_PARTITIONS); + validateAllowed(SQLGrammarElement.SHOW_PARTITIONS); return super.visitShowPartitions(ctx); } @Override public Void visitShowTableExtended(ShowTableExtendedContext ctx) { - validateAllowed(GrammarElement.SHOW_TABLE_EXTENDED); + validateAllowed(SQLGrammarElement.SHOW_TABLE_EXTENDED); return super.visitShowTableExtended(ctx); } @Override public Void visitShowTables(ShowTablesContext ctx) { - validateAllowed(GrammarElement.SHOW_TABLES); + validateAllowed(SQLGrammarElement.SHOW_TABLES); return super.visitShowTables(ctx); } @Override public Void visitShowTblProperties(ShowTblPropertiesContext ctx) { - validateAllowed(GrammarElement.SHOW_TBLPROPERTIES); + validateAllowed(SQLGrammarElement.SHOW_TBLPROPERTIES); return super.visitShowTblProperties(ctx); } @Override public Void visitShowViews(ShowViewsContext ctx) { - validateAllowed(GrammarElement.SHOW_VIEWS); + validateAllowed(SQLGrammarElement.SHOW_VIEWS); return super.visitShowViews(ctx); } @Override public Void visitUncacheTable(UncacheTableContext ctx) { - validateAllowed(GrammarElement.UNCACHE_TABLE); + validateAllowed(SQLGrammarElement.UNCACHE_TABLE); return super.visitUncacheTable(ctx); } @@ -559,27 +559,27 @@ private void validateFunctionAllowed(String function) { FunctionType type = FunctionType.fromFunctionName(function.toLowerCase()); switch (type) { case MAP: - validateAllowed(GrammarElement.MAP_FUNCTIONS); + validateAllowed(SQLGrammarElement.MAP_FUNCTIONS); break; case BITWISE: validateAllowed(GrammarElement.BITWISE_FUNCTIONS); break; case CSV: - validateAllowed(GrammarElement.CSV_FUNCTIONS); + validateAllowed(SQLGrammarElement.CSV_FUNCTIONS); break; case MISC: - validateAllowed(GrammarElement.MISC_FUNCTIONS); + validateAllowed(SQLGrammarElement.MISC_FUNCTIONS); break; case GENERATOR: validateAllowed(GrammarElement.GENERATOR_FUNCTIONS); break; case UDF: - validateAllowed(GrammarElement.UDF); + validateAllowed(SQLGrammarElement.UDF); break; } } - private void validateAllowed(GrammarElement element) { + private void validateAllowed(SQLGrammarElement element) { if (!grammarElementValidator.isValid(element)) { throw new IllegalArgumentException(element + " is not allowed."); } diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeGrammarElementValidator.java deleted file mode 100644 index 7dd2b0ee89..0000000000 --- a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeGrammarElementValidator.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.validator; - -import static org.opensearch.sql.spark.validator.GrammarElement.ALTER_NAMESPACE; -import static org.opensearch.sql.spark.validator.GrammarElement.ALTER_VIEW; -import static org.opensearch.sql.spark.validator.GrammarElement.ANALYZE_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.CACHE_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.CLEAR_CACHE; -import static org.opensearch.sql.spark.validator.GrammarElement.CLUSTER_BY; -import static org.opensearch.sql.spark.validator.GrammarElement.CREATE_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.CREATE_NAMESPACE; -import static org.opensearch.sql.spark.validator.GrammarElement.CREATE_VIEW; -import static org.opensearch.sql.spark.validator.GrammarElement.CROSS_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.CSV_FUNCTIONS; -import static org.opensearch.sql.spark.validator.GrammarElement.DESCRIBE_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.DESCRIBE_NAMESPACE; -import static org.opensearch.sql.spark.validator.GrammarElement.DESCRIBE_QUERY; -import static org.opensearch.sql.spark.validator.GrammarElement.DESCRIBE_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.DISTRIBUTE_BY; -import static org.opensearch.sql.spark.validator.GrammarElement.DROP_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.DROP_NAMESPACE; -import static org.opensearch.sql.spark.validator.GrammarElement.DROP_VIEW; -import static org.opensearch.sql.spark.validator.GrammarElement.FILE; -import static org.opensearch.sql.spark.validator.GrammarElement.FULL_OUTER_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.HINTS; -import static org.opensearch.sql.spark.validator.GrammarElement.INLINE_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.INSERT; -import static org.opensearch.sql.spark.validator.GrammarElement.LEFT_ANTI_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.LEFT_SEMI_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.LOAD; -import static org.opensearch.sql.spark.validator.GrammarElement.MANAGE_RESOURCE; -import static org.opensearch.sql.spark.validator.GrammarElement.MISC_FUNCTIONS; -import static org.opensearch.sql.spark.validator.GrammarElement.REFRESH_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.REFRESH_RESOURCE; -import static org.opensearch.sql.spark.validator.GrammarElement.REFRESH_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.REPAIR_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.RESET; -import static org.opensearch.sql.spark.validator.GrammarElement.RIGHT_OUTER_JOIN; -import static org.opensearch.sql.spark.validator.GrammarElement.SET; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_COLUMNS; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_CREATE_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_FUNCTIONS; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_NAMESPACES; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_PARTITIONS; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_TABLES; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_TABLE_EXTENDED; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_TBLPROPERTIES; -import static org.opensearch.sql.spark.validator.GrammarElement.SHOW_VIEWS; -import static org.opensearch.sql.spark.validator.GrammarElement.TABLESAMPLE; -import static org.opensearch.sql.spark.validator.GrammarElement.TABLE_VALUED_FUNCTION; -import static org.opensearch.sql.spark.validator.GrammarElement.TRANSFORM; -import static org.opensearch.sql.spark.validator.GrammarElement.TRUNCATE_TABLE; -import static org.opensearch.sql.spark.validator.GrammarElement.UDF; -import static org.opensearch.sql.spark.validator.GrammarElement.UNCACHE_TABLE; - -import com.google.common.collect.ImmutableSet; -import java.util.Set; - -public class SecurityLakeGrammarElementValidator extends DenyListGrammarElementValidator { - private static final Set SECURITY_LAKE_DENY_LIST = - ImmutableSet.builder() - .add( - ALTER_NAMESPACE, - ALTER_VIEW, - CREATE_NAMESPACE, - CREATE_FUNCTION, - CREATE_VIEW, - DROP_FUNCTION, - DROP_NAMESPACE, - DROP_VIEW, - REPAIR_TABLE, - TRUNCATE_TABLE, - INSERT, - LOAD, - CLUSTER_BY, - DISTRIBUTE_BY, - HINTS, - INLINE_TABLE, - FILE, - CROSS_JOIN, - LEFT_SEMI_JOIN, - RIGHT_OUTER_JOIN, - FULL_OUTER_JOIN, - LEFT_ANTI_JOIN, - TABLESAMPLE, - TABLE_VALUED_FUNCTION, - TRANSFORM, - MANAGE_RESOURCE, - ANALYZE_TABLE, - CACHE_TABLE, - CLEAR_CACHE, - DESCRIBE_NAMESPACE, - DESCRIBE_FUNCTION, - DESCRIBE_QUERY, - DESCRIBE_TABLE, - REFRESH_RESOURCE, - REFRESH_TABLE, - REFRESH_FUNCTION, - RESET, - SET, - SHOW_COLUMNS, - SHOW_CREATE_TABLE, - SHOW_NAMESPACES, - SHOW_FUNCTIONS, - SHOW_PARTITIONS, - SHOW_TABLE_EXTENDED, - SHOW_TABLES, - SHOW_TBLPROPERTIES, - SHOW_VIEWS, - UNCACHE_TABLE, - CSV_FUNCTIONS, - MISC_FUNCTIONS, - UDF) - .build(); - - public SecurityLakeGrammarElementValidator() { - super(SECURITY_LAKE_DENY_LIST); - } -} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeSQLGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeSQLGrammarElementValidator.java new file mode 100644 index 0000000000..89af6f31a4 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeSQLGrammarElementValidator.java @@ -0,0 +1,123 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ALTER_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ALTER_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ANALYZE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CACHE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CLEAR_CACHE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CLUSTER_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CROSS_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CSV_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_QUERY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DISTRIBUTE_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FILE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FULL_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.HINTS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INLINE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INSERT; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_ANTI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_SEMI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LOAD; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MANAGE_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MISC_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REPAIR_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RESET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RIGHT_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_COLUMNS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_CREATE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_NAMESPACES; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_PARTITIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_TABLES; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_TABLE_EXTENDED; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_TBLPROPERTIES; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_VIEWS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLESAMPLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLE_VALUED_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TRANSFORM; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TRUNCATE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.UDF; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.UNCACHE_TABLE; + +import com.google.common.collect.ImmutableSet; +import java.util.Set; + +public class SecurityLakeSQLGrammarElementValidator extends DenyListGrammarElementValidator { + private static final Set SECURITY_LAKE_DENY_LIST = + ImmutableSet.builder() + .add( + ALTER_NAMESPACE, + ALTER_VIEW, + CREATE_NAMESPACE, + CREATE_FUNCTION, + CREATE_VIEW, + DROP_FUNCTION, + DROP_NAMESPACE, + DROP_VIEW, + REPAIR_TABLE, + TRUNCATE_TABLE, + INSERT, + LOAD, + CLUSTER_BY, + DISTRIBUTE_BY, + HINTS, + INLINE_TABLE, + FILE, + CROSS_JOIN, + LEFT_SEMI_JOIN, + RIGHT_OUTER_JOIN, + FULL_OUTER_JOIN, + LEFT_ANTI_JOIN, + TABLESAMPLE, + TABLE_VALUED_FUNCTION, + TRANSFORM, + MANAGE_RESOURCE, + ANALYZE_TABLE, + CACHE_TABLE, + CLEAR_CACHE, + DESCRIBE_NAMESPACE, + DESCRIBE_FUNCTION, + DESCRIBE_QUERY, + DESCRIBE_TABLE, + REFRESH_RESOURCE, + REFRESH_TABLE, + REFRESH_FUNCTION, + RESET, + SET, + SHOW_COLUMNS, + SHOW_CREATE_TABLE, + SHOW_NAMESPACES, + SHOW_FUNCTIONS, + SHOW_PARTITIONS, + SHOW_TABLE_EXTENDED, + SHOW_TABLES, + SHOW_TBLPROPERTIES, + SHOW_VIEWS, + UNCACHE_TABLE, + CSV_FUNCTIONS, + MISC_FUNCTIONS, + UDF) + .build(); + + public SecurityLakeSQLGrammarElementValidator() { + super(SECURITY_LAKE_DENY_LIST); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryCoreIntegTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryCoreIntegTest.java index 5ef8343dcc..382b560727 100644 --- a/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryCoreIntegTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryCoreIntegTest.java @@ -88,7 +88,8 @@ import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; -import org.opensearch.sql.spark.validator.S3GlueGrammarElementValidator; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; import org.opensearch.sql.spark.validator.SQLQueryValidator; /** @@ -184,15 +185,20 @@ public void setUp() { SQLQueryValidator sqlQueryValidator = new SQLQueryValidator( new GrammarElementValidatorProvider( - ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueGrammarElementValidator()), + ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueSQLGrammarElementValidator()), new DefaultGrammarElementValidator())); + PPLQueryValidator pplQueryValidator = + new PPLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(), new DefaultGrammarElementValidator())); SparkQueryDispatcher sparkQueryDispatcher = new SparkQueryDispatcher( dataSourceService, sessionManager, queryHandlerFactory, queryIdProvider, - sqlQueryValidator); + sqlQueryValidator, + pplQueryValidator); asyncQueryExecutorService = new AsyncQueryExecutorServiceImpl( asyncQueryJobMetadataStorageService, diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java index 405fdf511d..61d72773d9 100644 --- a/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java @@ -91,7 +91,8 @@ import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; -import org.opensearch.sql.spark.validator.S3GlueGrammarElementValidator; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; import org.opensearch.sql.spark.validator.SQLQueryValidator; @ExtendWith(MockitoExtension.class) @@ -120,9 +121,14 @@ public class SparkQueryDispatcherTest { private final SQLQueryValidator sqlQueryValidator = new SQLQueryValidator( new GrammarElementValidatorProvider( - ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueGrammarElementValidator()), + ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueSQLGrammarElementValidator()), new DefaultGrammarElementValidator())); + private final PPLQueryValidator pplQueryValidator = + new PPLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(), new DefaultGrammarElementValidator())); + private DataSourceSparkParameterComposer dataSourceSparkParameterComposer = (datasourceMetadata, sparkSubmitParameters, dispatchQueryRequest, context) -> { sparkSubmitParameters.setConfigItem(FLINT_INDEX_STORE_AUTH_KEY, "basic"); @@ -175,7 +181,8 @@ void setUp() { sessionManager, queryHandlerFactory, queryIdProvider, - sqlQueryValidator); + sqlQueryValidator, + pplQueryValidator); } @Test @@ -584,7 +591,8 @@ void testDispatchAlterToManualRefreshIndexQuery() { sessionManager, queryHandlerFactory, queryIdProvider, - sqlQueryValidator); + sqlQueryValidator, + pplQueryValidator); String query = "ALTER INDEX elb_and_requestUri ON my_glue.default.http_logs WITH" + " (auto_refresh = false)"; @@ -614,7 +622,8 @@ void testDispatchDropIndexQuery() { sessionManager, queryHandlerFactory, queryIdProvider, - sqlQueryValidator); + sqlQueryValidator, + pplQueryValidator); String query = "DROP INDEX elb_and_requestUri ON my_glue.default.http_logs"; DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProviderTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProviderTest.java index 7d4b255356..3b24c3c8ab 100644 --- a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProviderTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProviderTest.java @@ -12,25 +12,26 @@ import org.opensearch.sql.datasource.model.DataSourceType; class GrammarElementValidatorProviderTest { - S3GlueGrammarElementValidator s3GlueGrammarElementValidator = new S3GlueGrammarElementValidator(); - SecurityLakeGrammarElementValidator securityLakeGrammarElementValidator = - new SecurityLakeGrammarElementValidator(); + S3GlueSQLGrammarElementValidator s3GlueSQLGrammarElementValidator = + new S3GlueSQLGrammarElementValidator(); + SecurityLakeSQLGrammarElementValidator securityLakeSQLGrammarElementValidator = + new SecurityLakeSQLGrammarElementValidator(); DefaultGrammarElementValidator defaultGrammarElementValidator = new DefaultGrammarElementValidator(); GrammarElementValidatorProvider grammarElementValidatorProvider = new GrammarElementValidatorProvider( ImmutableMap.of( - DataSourceType.S3GLUE, s3GlueGrammarElementValidator, - DataSourceType.SECURITY_LAKE, securityLakeGrammarElementValidator), + DataSourceType.S3GLUE, s3GlueSQLGrammarElementValidator, + DataSourceType.SECURITY_LAKE, securityLakeSQLGrammarElementValidator), defaultGrammarElementValidator); @Test public void test() { assertEquals( - s3GlueGrammarElementValidator, + s3GlueSQLGrammarElementValidator, grammarElementValidatorProvider.getValidatorForDatasource(DataSourceType.S3GLUE)); assertEquals( - securityLakeGrammarElementValidator, + securityLakeSQLGrammarElementValidator, grammarElementValidatorProvider.getValidatorForDatasource(DataSourceType.SECURITY_LAKE)); assertEquals( defaultGrammarElementValidator, diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/PPLQueryValidatorTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/PPLQueryValidatorTest.java new file mode 100644 index 0000000000..54df2874b2 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/PPLQueryValidatorTest.java @@ -0,0 +1,167 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; + +import java.util.Arrays; +import lombok.AllArgsConstructor; +import lombok.Getter; +import org.antlr.v4.runtime.CommonTokenStream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SingleStatementContext; + +@ExtendWith(MockitoExtension.class) +public class PPLQueryValidatorTest { + @Mock GrammarElementValidatorProvider mockedProvider; + + @InjectMocks PPLQueryValidator pplQueryValidator; + + private static final String SOURCE_PREFIX = "source = t | "; + + private enum TestElement { + FIELDS("fields field1, field1"), + WHERE("where field1=\"success\""), + STATS("stats count(), count(`field1`), min(`field1`), max(`field1`)"), + PARSE("parse `field1` \".*/(?[^/]+$)\""), + PATTERNS("patterns new_field='no_numbers' pattern='[0-9]' message"), + SORT("sort -field1Alias"), + EVAL("eval field2 = `field` * 2"), + RENAME("rename field2 as field1"), + HEAD("head 10"), + GROK("grok email '.+@%{HOSTNAME:host)'"), + TOP("top 2 Field1 by Field2"), + DEDUP("dedup field1"), + JOIN("join on c_custkey = o_custkey orders"), + LOOKUP("lookup account_list mkt_id AS mkt_code REPLACE amount, account_name AS name"), + SUBQUERY("where a > [ source = inner | stats min(c) ]"), + RARE("rare Field1 by Field2"), + TRENDLINE("trendline sma(2, field1) as Field1Alias"), + EVENTSTATS("eventstats sum(field1) by field2"), + FLATTEN("flatten field1"), + FIELD_SUMMARY("fieldsummary includefields=field1 nulls=true"), + FILLNULL("fillnull with 0 in field1"), + EXPAND("expand employee"), + DESCRIBE(false, "describe schema.table"), + STRING_FUNCTIONS("eval cl1Len = LENGTH(col1)"), + DATETIME_FUNCTIONS("eval newDate = ADDDATE(DATE('2020-08-26'), 1)"), + CONDITION_FUNCTIONS("eval field2 = isnull(col1)"), + MATH_FUNCTIONS("eval field2 = ACOS(col1)"), + EXPRESSIONS("where age > (25 + 5)"), + IPADDRESS_FUNCTIONS("where cidrmatch(ip, '192.168.0.1/24')"), + JSON_FUNCTIONS("where cidrmatch(ip, '192.168.0.1/24')"), + LAMBDA_FUNCTIONS("eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0)"), + CRYPTO_FUNCTIONS("eval field1 = MD5('hello')"); + + @Getter private final String[] queries; + + TestElement(String... queries) { + this.queries = addPrefix(queries); + } + + // For describe + TestElement(boolean addPrefix, String... queries) { + this.queries = addPrefix ? addPrefix(queries) : queries; + } + + private String[] addPrefix(String... queries) { + return Arrays.stream(queries).map(query -> SOURCE_PREFIX + query).toArray(String[]::new); + } + } + + @Test + void testAllowAllByDefault() { + when(mockedProvider.getValidatorForDatasource(any())) + .thenReturn(new DefaultGrammarElementValidator()); + VerifyValidator v = new VerifyValidator(pplQueryValidator, DataSourceType.SPARK); + Arrays.stream(PPLQueryValidatorTest.TestElement.values()).forEach(v::ok); + } + + @Test + void testCwlValidator() { + when(mockedProvider.getValidatorForDatasource(any())) + .thenReturn(new CWLPPLGrammarElementValidator()); + VerifyValidator v = new VerifyValidator(pplQueryValidator, DataSourceType.SPARK); + + v.ok(TestElement.FIELDS); + v.ok(TestElement.WHERE); + v.ok(TestElement.STATS); + v.ok(TestElement.PARSE); + v.ng(TestElement.PATTERNS); + v.ok(TestElement.SORT); + v.ok(TestElement.EVAL); + v.ok(TestElement.RENAME); + v.ok(TestElement.HEAD); + v.ok(TestElement.GROK); + v.ok(TestElement.TOP); + v.ok(TestElement.DEDUP); + v.ng(TestElement.JOIN); + v.ng(TestElement.LOOKUP); + v.ng(TestElement.SUBQUERY); + v.ok(TestElement.RARE); + v.ok(TestElement.TRENDLINE); + v.ok(TestElement.EVENTSTATS); + v.ng(TestElement.FLATTEN); + v.ok(TestElement.FIELD_SUMMARY); + v.ng(TestElement.FILLNULL); + v.ng(TestElement.EXPAND); + v.ng(TestElement.DESCRIBE); + v.ok(TestElement.STRING_FUNCTIONS); + v.ok(TestElement.DATETIME_FUNCTIONS); + v.ok(TestElement.CONDITION_FUNCTIONS); + v.ok(TestElement.MATH_FUNCTIONS); + v.ok(TestElement.EXPRESSIONS); + v.ng(TestElement.IPADDRESS_FUNCTIONS); + v.ng(TestElement.JSON_FUNCTIONS); + v.ng(TestElement.LAMBDA_FUNCTIONS); + v.ok(TestElement.CRYPTO_FUNCTIONS); + } + + @AllArgsConstructor + private static class VerifyValidator { + private final PPLQueryValidator validator; + private final DataSourceType dataSourceType; + + public void ok(PPLQueryValidatorTest.TestElement query) { + runValidate(query.getQueries()); + } + + public void ng(PPLQueryValidatorTest.TestElement element) { + Arrays.stream(element.queries) + .forEach( + query -> + assertThrows( + IllegalArgumentException.class, + () -> runValidate(query), + "The query should throw: query=`" + query.toString() + "`")); + } + + void runValidate(String[] queries) { + Arrays.stream(queries).forEach(query -> validator.validate(query, dataSourceType)); + } + + void runValidate(String query) { + validator.validate(query, dataSourceType); + } + + SingleStatementContext getParser(String query) { + org.opensearch.sql.spark.antlr.parser.SqlBaseParser sqlBaseParser = + new org.opensearch.sql.spark.antlr.parser.SqlBaseParser( + new CommonTokenStream( + new org.opensearch.sql.spark.antlr.parser.SqlBaseLexer( + new CaseInsensitiveCharStream(query)))); + return sqlBaseParser.singleStatement(); + } + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/SQLQueryValidatorTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/SQLQueryValidatorTest.java index 520fc96ba8..3e4eef52fd 100644 --- a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/SQLQueryValidatorTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/SQLQueryValidatorTest.java @@ -332,7 +332,7 @@ void testDenyAllValidator() { @Test void testS3glueQueries() { when(mockedProvider.getValidatorForDatasource(any())) - .thenReturn(new S3GlueGrammarElementValidator()); + .thenReturn(new S3GlueSQLGrammarElementValidator()); VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.S3GLUE); // DDL Statements @@ -449,7 +449,7 @@ void testS3glueQueries() { @Test void testSecurityLakeQueries() { when(mockedProvider.getValidatorForDatasource(any())) - .thenReturn(new SecurityLakeGrammarElementValidator()); + .thenReturn(new SecurityLakeSQLGrammarElementValidator()); VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SECURITY_LAKE); // DDL Statements diff --git a/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java index db070182a3..a577f32e3b 100644 --- a/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java @@ -65,11 +65,13 @@ import org.opensearch.sql.spark.response.OpenSearchJobExecutionResponseReader; import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; import org.opensearch.sql.spark.scheduler.OpenSearchAsyncQueryScheduler; +import org.opensearch.sql.spark.validator.CWLPPLGrammarElementValidator; import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; -import org.opensearch.sql.spark.validator.S3GlueGrammarElementValidator; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; import org.opensearch.sql.spark.validator.SQLQueryValidator; -import org.opensearch.sql.spark.validator.SecurityLakeGrammarElementValidator; +import org.opensearch.sql.spark.validator.SecurityLakeSQLGrammarElementValidator; @RequiredArgsConstructor public class AsyncExecutorServiceModule extends AbstractModule { @@ -108,9 +110,15 @@ public SparkQueryDispatcher sparkQueryDispatcher( SessionManager sessionManager, QueryHandlerFactory queryHandlerFactory, QueryIdProvider queryIdProvider, - SQLQueryValidator sqlQueryValidator) { + SQLQueryValidator sqlQueryValidator, + PPLQueryValidator pplQueryValidator) { return new SparkQueryDispatcher( - dataSourceService, sessionManager, queryHandlerFactory, queryIdProvider, sqlQueryValidator); + dataSourceService, + sessionManager, + queryHandlerFactory, + queryIdProvider, + sqlQueryValidator, + pplQueryValidator); } @Provides @@ -187,13 +195,22 @@ public SQLQueryValidator sqlQueryValidator() { new GrammarElementValidatorProvider( ImmutableMap.of( DataSourceType.S3GLUE, - new S3GlueGrammarElementValidator(), + new S3GlueSQLGrammarElementValidator(), DataSourceType.SECURITY_LAKE, - new SecurityLakeGrammarElementValidator()), + new SecurityLakeSQLGrammarElementValidator()), new DefaultGrammarElementValidator()); return new SQLQueryValidator(validatorProvider); } + @Provides + public PPLQueryValidator pplQueryValidator() { + GrammarElementValidatorProvider validatorProvider = + new GrammarElementValidatorProvider( + ImmutableMap.of(DataSourceType.S3GLUE, new CWLPPLGrammarElementValidator()), + new DefaultGrammarElementValidator()); + return new PPLQueryValidator(validatorProvider); + } + @Provides public IndexDMLResultStorageService indexDMLResultStorageService( DataSourceService dataSourceService, StateStore stateStore) { diff --git a/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java index 72ed17f5aa..53b465aa6d 100644 --- a/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java @@ -104,7 +104,8 @@ import org.opensearch.sql.spark.scheduler.OpenSearchAsyncQueryScheduler; import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; -import org.opensearch.sql.spark.validator.S3GlueGrammarElementValidator; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; import org.opensearch.sql.spark.validator.SQLQueryValidator; import org.opensearch.sql.storage.DataSourceFactory; import org.opensearch.test.OpenSearchIntegTestCase; @@ -315,8 +316,12 @@ protected AsyncQueryExecutorService createAsyncQueryExecutorService( SQLQueryValidator sqlQueryValidator = new SQLQueryValidator( new GrammarElementValidatorProvider( - ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueGrammarElementValidator()), + ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueSQLGrammarElementValidator()), new DefaultGrammarElementValidator())); + PPLQueryValidator pplQueryValidator = + new PPLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(), new DefaultGrammarElementValidator())); SparkQueryDispatcher sparkQueryDispatcher = new SparkQueryDispatcher( this.dataSourceService, @@ -328,7 +333,8 @@ protected AsyncQueryExecutorService createAsyncQueryExecutorService( sessionIdProvider), queryHandlerFactory, new DatasourceEmbeddedQueryIdProvider(), - sqlQueryValidator); + sqlQueryValidator, + pplQueryValidator); return new AsyncQueryExecutorServiceImpl( asyncQueryJobMetadataStorageService, sparkQueryDispatcher,