diff --git a/docs/category.json b/docs/category.json index 130f2dfe4b..595121a7d4 100644 --- a/docs/category.json +++ b/docs/category.json @@ -16,6 +16,7 @@ ], "sql_cli": [ "user/dql/expressions.rst", + "user/general/identifiers.rst", "user/dql/functions.rst", "user/beyond/partiql.rst" ] diff --git a/docs/user/dql/expressions.rst b/docs/user/dql/expressions.rst index ea5d9160ba..dca1c6463b 100644 --- a/docs/user/dql/expressions.rst +++ b/docs/user/dql/expressions.rst @@ -14,7 +14,7 @@ Introduction Expressions, particularly value expressions, are those which return a scalar value. Expressions have different types and forms. For example, there are literal values as atom expression and arithmetic, predicate and function expression built on top of them. And also expressions can be used in different clauses, such as using arithmetic expression in ``SELECT``, ``WHERE`` or ``HAVING`` clause. -Note that before you try out examples using the SQL features in this doc, you need to enable the new query engine by following the steps in ``opendistro.sql.engine.new.enabled`` section in `Plugin Settings `_. +Note that before you try out examples using the SQL features in this doc, you need to enable the new query engine by following the steps in ``opendistro.sql.engine.new.enabled`` section in `Plugin Settings <../admin/settings.rst>`_. Literal Values ============== diff --git a/docs/user/general/identifiers.rst b/docs/user/general/identifiers.rst new file mode 100644 index 0000000000..16b6a105c7 --- /dev/null +++ b/docs/user/general/identifiers.rst @@ -0,0 +1,108 @@ +=========== +Identifiers +=========== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Introduction +============ + +Identifiers are used for naming your database objects, such as index name, field name, alias etc. Basically there are two types of identifiers: regular identifiers and delimited identifiers. + + +Regular Identifiers +=================== + +Description +----------- + +According to ANSI SQL standard, a regular identifier is a string of characters that must start with ASCII letter (lower or upper case). The subsequent character can be a combination of letter, digit, underscore (``_``). It cannot be a reversed key word. And whitespace and other special characters are not allowed. Additionally in our SQL parser, we make extension to the rule for Elasticsearch storage as shown in next sub-section. + +Extensions +---------- + +For Elasticsearch, the following identifiers are supported extensionally by our SQL parser for convenience (without the need of being delimited as shown in next section): + +1. Identifiers prefixed by dot ``.``: this is called hidden index in Elasticsearch, for example ``.kibana``. +2. Identifiers prefixed by at sign ``@``: this is common for meta fields generated in Logstash ingestion. +3. Identifiers with ``-`` in the middle: this is mostly the case for index name with date information. +4. Identifiers with star ``*`` present: this is mostly an index pattern for wildcard match. + +Examples +-------- + +Here are examples for using index pattern directly without quotes:: + + od> SELECT * FROM *cc*nt*; + fetched rows / total rows = 4/4 + +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + + +Delimited Identifiers +===================== + +Description +----------- + +A delimited identifier is an identifier enclosed in back ticks ````` or double quotation marks ``"``. In this case, the identifier enclosed is not necessarily a regular identifier. In other words, it can contain any special character not allowed by regular identifier. + +Please note the difference between single quote and double quotes in SQL syntax. Single quote is used to enclose a string literal while double quotes have same purpose as back ticks to escape special characters in an identifier. + +Use Cases +--------- + +Here are typical examples of the use of delimited identifiers: + +1. Identifiers of reserved key word name +2. Identifiers with dot ``.`` present: similarly as ``-`` in index name to include date information, it is required to be quoted so parser can differentiate it from identifier with qualifiers. +3. Identifiers with other special character: Elasticsearch has its own rule which allows more special character, for example Unicode character is supported in index name. + +Examples +-------- + +Here are examples for quoting an index name by back ticks:: + + od> SELECT * FROM `accounts`; + fetched rows / total rows = 4/4 + +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + + +Case Sensitivity +================ + +Description +----------- + +In SQL-92, regular identifiers are case insensitive and converted to upper case automatically just like key word. While characters in a delimited identifier appear as they are. However, in our SQL implementation, identifiers are treated in case sensitive manner. So it must be exactly same as what is stored in Elasticsearch which is different from ANSI standard. + +Examples +-------- + +For example, if you run ``SELECT * FROM ACCOUNTS``, it will end up with an index not found exception from our plugin because the actual index name is under lower case. + + +Identifier Qualifiers +===================== + +For now, we do not support using Elasticsearch cluster name as catalog name to qualify an index name, such as ``my-cluster.logs``. + +TODO: field name qualifiers diff --git a/docs/user/index.rst b/docs/user/index.rst index c3562c305c..f20e9922f2 100644 --- a/docs/user/index.rst +++ b/docs/user/index.rst @@ -17,6 +17,10 @@ Open Distro for Elasticsearch SQL enables you to extract insights out of Elastic - `Plugin Settings `_ +* **Language Structure** + + - `Identifiers `_ + * **Data Query Language** - `Expressions `_ diff --git a/integ-test/build.gradle b/integ-test/build.gradle index 3719344ff0..73422f4435 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -56,12 +56,13 @@ compileTestJava { } } -tasks.integTest.dependsOn(':plugin:bundlePlugin') +tasks.integTest.dependsOn(':plugin:bundlePlugin', ':integ-test:integTestWithNewEngine') testClusters.integTest { testDistribution = 'oss' plugin file(tasks.getByPath(':plugin:bundlePlugin').archiveFile) } +// Run only legacy SQL ITs with new SQL engine disabled integTest.runner { systemProperty 'tests.security.manager', 'false' systemProperty('project.root', project.projectDir.absolutePath) @@ -78,12 +79,44 @@ integTest.runner { jvmArgs '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' } - include 'com/amazon/opendistroforelasticsearch/sql/ppl/**/*IT.class' - include 'com/amazon/opendistroforelasticsearch/sql/legacy/**/*IT.class' + exclude 'com/amazon/opendistroforelasticsearch/sql/ppl/**/*IT.class' + exclude 'com/amazon/opendistroforelasticsearch/sql/sql/**/*IT.class' exclude 'com/amazon/opendistroforelasticsearch/sql/doctest/**/*IT.class' exclude 'com/amazon/opendistroforelasticsearch/sql/correctness/**' } +// Run PPL ITs and new, legacy and comparison SQL ITs with new SQL engine enabled +task integTestWithNewEngine(type: RestIntegTestTask) { + dependsOn ':plugin:bundlePlugin' + runner { + systemProperty 'tests.security.manager', 'false' + systemProperty('project.root', project.projectDir.absolutePath) + + systemProperty "https", System.getProperty("https") + systemProperty "user", System.getProperty("user") + systemProperty "password", System.getProperty("password") + + // Enable new SQL engine + systemProperty 'enableNewEngine', 'true' + + // Tell the test JVM if the cluster JVM is running under a debugger so that tests can use longer timeouts for + // requests. The 'doFirst' delays reading the debug setting on the cluster till execution time. + doFirst { systemProperty 'cluster.debug', getDebug() } + + if (System.getProperty("test.debug") != null) { + jvmArgs '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' + } + + exclude 'com/amazon/opendistroforelasticsearch/sql/doctest/**/*IT.class' + exclude 'com/amazon/opendistroforelasticsearch/sql/correctness/**' + } +} + +testClusters.integTestWithNewEngine { + testDistribution = 'oss' + plugin file(tasks.getByPath(':plugin:bundlePlugin').archiveFile) +} + task docTest(type: RestIntegTestTask) { dependsOn ':plugin:bundlePlugin' diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java index e443799acb..aac3497165 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java @@ -82,6 +82,7 @@ public void setUpIndices() throws Exception { } increaseScriptMaxCompilationsRate(); + enableNewQueryEngine(); init(); } @@ -149,6 +150,13 @@ private void increaseScriptMaxCompilationsRate() throws IOException { new ClusterSetting("transient", "script.max_compilations_rate", "10000/1m")); } + private void enableNewQueryEngine() throws IOException { + boolean isEnabled = Boolean.parseBoolean(System.getProperty("enableNewEngine", "false")); + if (isEnabled) { + com.amazon.opendistroforelasticsearch.sql.util.TestUtils.enableNewQueryEngine(client()); + } + } + protected static void wipeAllClusterSettings() throws IOException { updateClusterSettings(new ClusterSetting("persistent", "*", null)); updateClusterSettings(new ClusterSetting("transient", "*", null)); diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/SQLIntegTestCase.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/CorrectnessTestBase.java similarity index 98% rename from integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/SQLIntegTestCase.java rename to integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/CorrectnessTestBase.java index faef8f713f..d31f297728 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/SQLIntegTestCase.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/CorrectnessTestBase.java @@ -39,7 +39,7 @@ * enforce the success of all tests rather than report failures only. */ @ThreadLeakScope(ThreadLeakScope.Scope.NONE) -public abstract class SQLIntegTestCase extends RestIntegTestCase { +public abstract class CorrectnessTestBase extends RestIntegTestCase { /** * Comparison test runner shared by all methods in this IT class. diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/IdentifierIT.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/IdentifierIT.java new file mode 100644 index 0000000000..0ed107b242 --- /dev/null +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/IdentifierIT.java @@ -0,0 +1,98 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + */ + +package com.amazon.opendistroforelasticsearch.sql.sql; + +import static com.amazon.opendistroforelasticsearch.sql.util.TestUtils.createHiddenIndexByRestClient; +import static com.amazon.opendistroforelasticsearch.sql.util.TestUtils.performRequest; + +import com.amazon.opendistroforelasticsearch.sql.legacy.SQLIntegTestCase; +import java.io.IOException; +import org.elasticsearch.client.Request; +import org.junit.jupiter.api.Test; + +/** + * Integration tests for identifiers including index and field name symbol. + */ +public class IdentifierIT extends SQLIntegTestCase { + + @Test + public void testIndexNames() throws IOException { + createIndexWithOneDoc("logs", "logs_2020_01"); + queryAndAssertTheDoc("SELECT * FROM logs"); + queryAndAssertTheDoc("SELECT * FROM logs_2020_01"); + } + + @Test + public void testSpecialIndexNames() throws IOException { + createIndexWithOneDoc(".system", "logs-2020-01"); + queryAndAssertTheDoc("SELECT * FROM .system"); + queryAndAssertTheDoc("SELECT * FROM logs-2020-01"); + } + + @Test + public void testQuotedIndexNames() throws IOException { + createIndexWithOneDoc("logs+2020+01", "logs.2020.01"); + queryAndAssertTheDoc("SELECT * FROM `logs+2020+01`"); + queryAndAssertTheDoc("SELECT * FROM \"logs.2020.01\""); + } + + private void createIndexWithOneDoc(String... indexNames) throws IOException { + for (String indexName : indexNames) { + new Index(indexName).addDoc("{\"age\": 30}"); + } + } + + private void queryAndAssertTheDoc(String sql) { + assertEquals( + "{\n" + + " \"schema\": [{\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " }],\n" + + " \"total\": 1,\n" + + " \"datarows\": [[30]],\n" + + " \"size\": 1\n" + + "}\n", + executeQuery(sql.replace("\"", "\\\""), "jdbc") + ); + } + + /** + * Index abstraction for test code readability. + */ + private static class Index { + + private final String indexName; + + Index(String indexName) throws IOException { + this.indexName = indexName; + + if (indexName.startsWith(".")) { + createHiddenIndexByRestClient(client(), indexName, ""); + } else { + executeRequest(new Request("PUT", "/" + indexName)); + } + } + + void addDoc(String doc) { + Request indexDoc = new Request("POST", String.format("/%s/_doc?refresh=true", indexName)); + indexDoc.setJsonEntity(doc); + performRequest(client(), indexDoc); + } + } + +} diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/SQLCorrectnessIT.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/SQLCorrectnessIT.java index 7ad193b985..58c10073a1 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/SQLCorrectnessIT.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/SQLCorrectnessIT.java @@ -28,7 +28,7 @@ /** * SQL integration test automated by comparison test framework. */ -public class SQLCorrectnessIT extends SQLIntegTestCase { +public class SQLCorrectnessIT extends CorrectnessTestBase { private static final String ROOT_DIR = "correctness/"; private static final String[] EXPR_TEST_DIR = { "expressions" }; diff --git a/integ-test/src/test/resources/correctness/queries/select.txt b/integ-test/src/test/resources/correctness/queries/select.txt index 18d34e7ee9..adb7f40782 100644 --- a/integ-test/src/test/resources/correctness/queries/select.txt +++ b/integ-test/src/test/resources/correctness/queries/select.txt @@ -1,2 +1,4 @@ +SELECT 1 + 2 FROM kibana_sample_data_flights +SELECT abs(-10) FROM kibana_sample_data_flights SELECT DistanceMiles FROM kibana_sample_data_flights SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPrice <= 500 diff --git a/legacy/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/plugin/RestSQLQueryActionTest.java b/legacy/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/plugin/RestSQLQueryActionTest.java index cf3f4b6ff7..9ed790bfcc 100644 --- a/legacy/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/plugin/RestSQLQueryActionTest.java +++ b/legacy/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/plugin/RestSQLQueryActionTest.java @@ -67,8 +67,8 @@ public void skipExplainThatNotSupport() { @Test public void skipQueryThatNotSupport() { SQLQueryRequest request = new SQLQueryRequest( - new JSONObject("{\"query\": \"SELECT * FROM test\"}"), - "SELECT * FROM test", + new JSONObject("{\"query\": \"SELECT * FROM test WHERE age = 10\"}"), + "SELECT * FROM test WHERE age = 10", QUERY_API_ENDPOINT, ""); diff --git a/sql/src/main/antlr/OpenDistroSQLIdentifierParser.g4 b/sql/src/main/antlr/OpenDistroSQLIdentifierParser.g4 new file mode 100644 index 0000000000..f29895e522 --- /dev/null +++ b/sql/src/main/antlr/OpenDistroSQLIdentifierParser.g4 @@ -0,0 +1,45 @@ +/* +MySQL (Positive Technologies) grammar +The MIT License (MIT). +Copyright (c) 2015-2017, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2017, Ivan Khudyashev (IHudyashov@ptsecurity.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +parser grammar OpenDistroSQLIdentifierParser; + +options { tokenVocab=OpenDistroSQLLexer; } + + +// Identifiers + +tableName + : qualifiedName + ; + +qualifiedName + : ident (DOT ident)* + ; + +ident + : DOT? ID + | DOUBLE_QUOTE_ID + | BACKTICK_QUOTE_ID + ; diff --git a/sql/src/main/antlr/OpenDistroSQLLexer.g4 b/sql/src/main/antlr/OpenDistroSQLLexer.g4 index 5a511d2aa6..66e3d1bade 100644 --- a/sql/src/main/antlr/OpenDistroSQLLexer.g4 +++ b/sql/src/main/antlr/OpenDistroSQLLexer.g4 @@ -295,7 +295,7 @@ COLON_SYMB: ':'; // Literal Primitives START_NATIONAL_STRING_LITERAL: 'N' SQUOTA_STRING; -STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; +STRING_LITERAL: SQUOTA_STRING; DECIMAL_LITERAL: DEC_DIGIT+; HEXADECIMAL_LITERAL: 'X' '\'' (HEX_DIGIT HEX_DIGIT)+ '\'' | '0X' HEX_DIGIT+; @@ -309,33 +309,17 @@ BIT_STRING: BIT_STRING_L; -// Hack for dotID -// Prevent recognize string: .123somelatin AS ((.123), FLOAT_LITERAL), ((somelatin), ID) -// it must recoginze: .123somelatin AS ((.), DOT), (123somelatin, ID) - -DOT_ID: '.' ID_LITERAL; - - - // Identifiers ID: ID_LITERAL; -// DOUBLE_QUOTE_ID: '"' ~'"'+ '"'; -REVERSE_QUOTE_ID: '`' ~'`'+ '`'; -STRING_USER_NAME: ( - SQUOTA_STRING | DQUOTA_STRING - | BQUOTA_STRING | ID_LITERAL - ) '@' - ( - SQUOTA_STRING | DQUOTA_STRING - | BQUOTA_STRING | ID_LITERAL - ); +DOUBLE_QUOTE_ID: DQUOTA_STRING; +BACKTICK_QUOTE_ID: BQUOTA_STRING; // Fragments for Literal primitives fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+; -fragment ID_LITERAL: [A-Z_$0-9@]*?[A-Z_$]+?[A-Z_$\-0-9]*; +fragment ID_LITERAL: [*A-Z]+?[*A-Z_\-0-9]*; fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; diff --git a/sql/src/main/antlr/OpenDistroSQLParser.g4 b/sql/src/main/antlr/OpenDistroSQLParser.g4 index 264576ee9d..a6517c5572 100644 --- a/sql/src/main/antlr/OpenDistroSQLParser.g4 +++ b/sql/src/main/antlr/OpenDistroSQLParser.g4 @@ -25,6 +25,8 @@ THE SOFTWARE. parser grammar OpenDistroSQLParser; +import OpenDistroSQLIdentifierParser; + options { tokenVocab=OpenDistroSQLLexer; } @@ -57,17 +59,26 @@ selectStatement // Select Statement's Details querySpecification + : selectClause + fromClause? + ; + +selectClause : SELECT selectElements ; selectElements - : selectElement (COMMA selectElement)* + : (star=STAR | selectElement) (',' selectElement)* ; selectElement : expression #selectExpressionElement ; +fromClause + : FROM tableName + ; + // Literals @@ -89,14 +100,7 @@ decimalLiteral ; stringLiteral - : ( - STRING_LITERAL - | START_NATIONAL_STRING_LITERAL - ) STRING_LITERAL+ - | ( - STRING_LITERAL - | START_NATIONAL_STRING_LITERAL - ) + : STRING_LITERAL ; booleanLiteral diff --git a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilder.java index 40f42293d2..0774bf769f 100644 --- a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilder.java @@ -16,12 +16,17 @@ package com.amazon.opendistroforelasticsearch.sql.sql.parser; +import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.FromClauseContext; +import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectClauseContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SimpleSelectContext; import com.amazon.opendistroforelasticsearch.sql.ast.expression.UnresolvedExpression; import com.amazon.opendistroforelasticsearch.sql.ast.tree.Project; +import com.amazon.opendistroforelasticsearch.sql.ast.tree.Relation; import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan; import com.amazon.opendistroforelasticsearch.sql.ast.tree.Values; +import com.amazon.opendistroforelasticsearch.sql.common.antlr.SyntaxCheckException; +import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.QuerySpecificationContext; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor; import com.google.common.collect.ImmutableList; import java.util.Collections; @@ -35,21 +40,47 @@ */ public class AstBuilder extends OpenDistroSQLParserBaseVisitor { + private static final Project SELECT_ALL = null; + private final AstExpressionBuilder expressionBuilder = new AstExpressionBuilder(); @Override public UnresolvedPlan visitSimpleSelect(SimpleSelectContext ctx) { - List selectElements = ctx.querySpecification().selectElements().children; - Project project = new Project(selectElements.stream() - .map(this::visitAstExpression) - .filter(Objects::nonNull) - .collect(Collectors.toList())); - - // Attach an Values operator with only a empty row inside so that - // Project operator can have a chance to evaluate its expression - // though the evaluation doesn't have any dependency on what's in Values. - Values emptyValue = new Values(ImmutableList.of(Collections.emptyList())); - return project.attach(emptyValue); + QuerySpecificationContext query = ctx.querySpecification(); + UnresolvedPlan project = visit(query.selectClause()); + + if (query.fromClause() == null) { + if (project == SELECT_ALL) { + throw new SyntaxCheckException("No FROM clause found for select all"); + } + + // Attach an Values operator with only a empty row inside so that + // Project operator can have a chance to evaluate its expression + // though the evaluation doesn't have any dependency on what's in Values. + Values emptyValue = new Values(ImmutableList.of(Collections.emptyList())); + return project.attach(emptyValue); + } + + UnresolvedPlan relation = visit(query.fromClause()); + return (project == SELECT_ALL) ? relation : project.attach(relation); + } + + @Override + public UnresolvedPlan visitSelectClause(SelectClauseContext ctx) { + if (ctx.selectElements().star != null) { //TODO: project operator should be required? + return SELECT_ALL; + } + + List selectElements = ctx.selectElements().children; + return new Project(selectElements.stream() + .map(this::visitAstExpression) + .filter(Objects::nonNull) + .collect(Collectors.toList())); + } + + @Override + public UnresolvedPlan visitFromClause(FromClauseContext ctx) { + return new Relation(visitAstExpression(ctx.tableName().qualifiedName())); } @Override diff --git a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java index 606813eb41..b3fab91c47 100644 --- a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java @@ -26,18 +26,43 @@ import com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL; import com.amazon.opendistroforelasticsearch.sql.ast.expression.Function; +import com.amazon.opendistroforelasticsearch.sql.ast.expression.QualifiedName; import com.amazon.opendistroforelasticsearch.sql.ast.expression.UnresolvedExpression; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser; +import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.IdentContext; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.NestedExpressionAtomContext; +import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.QualifiedNameContext; +import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.TableNameContext; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor; import java.util.Arrays; import java.util.stream.Collectors; +import org.antlr.v4.runtime.tree.RuleNode; /** * Expression builder to parse text to expression in AST. */ public class AstExpressionBuilder extends OpenDistroSQLParserBaseVisitor { + @Override + public UnresolvedExpression visitTableName(TableNameContext ctx) { + return new QualifiedName(visitQualifiedNameText(ctx)); + } + + @Override + public UnresolvedExpression visitIdent(IdentContext ctx) { + return new QualifiedName(visitQualifiedNameText(ctx)); + } + + @Override + public UnresolvedExpression visitQualifiedName(QualifiedNameContext ctx) { + return new QualifiedName( + ctx.ident() + .stream() + .map(this::visitQualifiedNameText) + .collect(Collectors.toList()) + ); + } + @Override public UnresolvedExpression visitMathExpressionAtom(MathExpressionAtomContext ctx) { return new Function( @@ -99,4 +124,8 @@ public UnresolvedExpression visitTimestampLiteral( return AstDSL.timestampLiteral(unquoteIdentifier(ctx.timestamp.getText())); } + private String visitQualifiedNameText(RuleNode node) { + return unquoteIdentifier(node.getText()); + } + } diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java index 7bd8d93b2b..6d3dee9585 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -20,7 +20,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import com.amazon.opendistroforelasticsearch.sql.common.antlr.SyntaxCheckException; -import org.antlr.v4.runtime.tree.ParseTree; import org.junit.jupiter.api.Test; class SQLSyntaxParserTest { @@ -29,13 +28,48 @@ class SQLSyntaxParserTest { @Test public void canParseSelectLiterals() { - ParseTree parseTree = parser.parse("SELECT 123, 'hello'"); - assertNotNull(parseTree); + assertNotNull(parser.parse("SELECT 123, 'hello'")); + } + + @Test + public void canParseIndexNameWithDate() { + assertNotNull(parser.parse("SELECT * FROM logs_2020_01")); + assertNotNull(parser.parse("SELECT * FROM logs-2020-01")); + } + + @Test + public void canParseHiddenIndexName() { + assertNotNull(parser.parse("SELECT * FROM .kibana")); + } + + @Test + public void canNotParseIndexNameWithSpecialChar() { + assertThrows(SyntaxCheckException.class, + () -> parser.parse("SELECT * FROM hello+world")); + } + + @Test + public void canParseIndexNameWithSpecialCharQuoted() { + assertNotNull(parser.parse("SELECT * FROM `hello+world`")); + assertNotNull(parser.parse("SELECT * FROM \"hello$world\"")); + } + + @Test + public void canNotParseIndexNameStartingWithNumber() { + assertThrows(SyntaxCheckException.class, + () -> parser.parse("SELECT * FROM 123test")); + } + + @Test + public void canNotParseIndexNameSingleQuoted() { + assertThrows(SyntaxCheckException.class, + () -> parser.parse("SELECT * FROM 'test'")); } @Test public void canNotParseInvalidSelect() { - assertThrows(SyntaxCheckException.class, () -> parser.parse("SELECT * FROM test")); + assertThrows(SyntaxCheckException.class, + () -> parser.parse("SELECT * FROM test WHERE age = 10")); } } \ No newline at end of file diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java index a5546db97f..5f047b22b5 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java @@ -20,12 +20,15 @@ import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.doubleLiteral; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.intLiteral; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.project; +import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.relation; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.stringLiteral; import static com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL.values; import static java.util.Collections.emptyList; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan; +import com.amazon.opendistroforelasticsearch.sql.common.antlr.SyntaxCheckException; import com.amazon.opendistroforelasticsearch.sql.sql.antlr.SQLSyntaxParser; import org.antlr.v4.runtime.tree.ParseTree; import org.junit.jupiter.api.Test; @@ -43,7 +46,7 @@ class AstBuilderTest { private final AstBuilder astBuilder = new AstBuilder(); @Test - public void buildASTForSelectLiterals() { + public void canBuildSelectLiterals() { assertEquals( project( values(emptyList()), @@ -56,6 +59,24 @@ public void buildASTForSelectLiterals() { ); } + @Test + public void canBuildSelectAllFromIndex() { + assertEquals( + relation("test"), + buildAST("SELECT * FROM test") + ); + + assertThrows(SyntaxCheckException.class, () -> buildAST("SELECT *")); + } + + @Test + public void buildSelectFieldsFromIndex() { // TODO: change to select fields later + assertEquals( + project(relation("test"), intLiteral(1)), + buildAST("SELECT 1 FROM test") + ); + } + private UnresolvedPlan buildAST(String query) { ParseTree parseTree = parser.parse(query); return parseTree.accept(astBuilder); diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstQualifiedNameBuilderTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstQualifiedNameBuilderTest.java new file mode 100644 index 0000000000..e8c1506e7d --- /dev/null +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstQualifiedNameBuilderTest.java @@ -0,0 +1,95 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + */ + +package com.amazon.opendistroforelasticsearch.sql.sql.parser; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.amazon.opendistroforelasticsearch.sql.ast.dsl.AstDSL; +import com.amazon.opendistroforelasticsearch.sql.ast.expression.UnresolvedExpression; +import com.amazon.opendistroforelasticsearch.sql.common.antlr.CaseInsensitiveCharStream; +import com.amazon.opendistroforelasticsearch.sql.common.antlr.SyntaxAnalysisErrorListener; +import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLLexer; +import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser; +import java.util.function.Function; +import lombok.RequiredArgsConstructor; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.RuleNode; +import org.junit.jupiter.api.Test; + +public class AstQualifiedNameBuilderTest { + + @Test + public void canBuildRegularIdentifierForSQLStandard() { + buildFromIdentifier("test").expectQualifiedName("test"); + buildFromIdentifier("test123").expectQualifiedName("test123"); + buildFromIdentifier("test_123").expectQualifiedName("test_123"); + } + + @Test + public void canBuildRegularIdentifierForElasticsearch() { + buildFromTableName(".kibana").expectQualifiedName(".kibana"); + //buildFromIdentifier("@timestamp").expectQualifiedName("@timestamp");//TODO: field name + buildFromIdentifier("logs-2020-01").expectQualifiedName("logs-2020-01"); + buildFromIdentifier("*logs*").expectQualifiedName("*logs*"); + } + + @Test + public void canBuildDelimitedIdentifier() { + buildFromIdentifier("\"hello$world\"").expectQualifiedName("hello$world"); + buildFromIdentifier("`logs.2020.01`").expectQualifiedName("logs.2020.01"); + } + + @Test + public void canBuildQualifiedIdentifier() { + buildFromQualifiers("account.location.city").expectQualifiedName("account", "location", "city"); + } + + private AstExpressionBuilderAssertion buildFromIdentifier(String expr) { + return new AstExpressionBuilderAssertion(OpenDistroSQLParser::ident, expr); + } + + private AstExpressionBuilderAssertion buildFromQualifiers(String expr) { + return new AstExpressionBuilderAssertion(OpenDistroSQLParser::qualifiedName, expr); + } + + private AstExpressionBuilderAssertion buildFromTableName(String expr) { + return new AstExpressionBuilderAssertion(OpenDistroSQLParser::tableName, expr); + } + + @RequiredArgsConstructor + private static class AstExpressionBuilderAssertion { + private final AstExpressionBuilder astExprBuilder = new AstExpressionBuilder(); + private final Function build; + private final String actual; + + public void expectQualifiedName(String... expected) { + assertEquals(AstDSL.qualifiedName(expected), buildExpression(actual)); + } + + private UnresolvedExpression buildExpression(String expr) { + return build.apply(createParser(expr)).accept(astExprBuilder); + } + + private OpenDistroSQLParser createParser(String expr) { + OpenDistroSQLLexer lexer = new OpenDistroSQLLexer(new CaseInsensitiveCharStream(expr)); + OpenDistroSQLParser parser = new OpenDistroSQLParser(new CommonTokenStream(lexer)); + parser.addErrorListener(new SyntaxAnalysisErrorListener()); + return parser; + } + } + +}