Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
Merge branch 'develop' into sql-error-message
Browse files Browse the repository at this point in the history
  • Loading branch information
dai-chen committed Jan 26, 2021
2 parents f0297e3 + c5ea315 commit d8c3deb
Show file tree
Hide file tree
Showing 28 changed files with 280 additions and 104 deletions.
18 changes: 1 addition & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,7 @@ Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Pip

## Experimental

Recently we have been actively improving our query engine primarily for better correctness and extensibility. The new enhanced query engine has been already supporting the new released Piped Processing Language query processing behind the scene. Meanwhile, the integration with SQL language is also under way. To try out the power of the new query engine with SQL, simply run the command to enable it by [plugin setting](https://github.com/opendistro-for-elasticsearch/sql/blob/develop/docs/user/admin/settings.rst#opendistro-sql-engine-new-enabled). In future release, this will be enabled by default and nothing required to do from your side. Please stay tuned for updates on our progress and its new exciting features.

Here is a documentation list with features only available in this improved SQL query engine. Please follow the instruction above to enable it before trying out example queries in these docs:

* [Identifiers](./docs/user/general/identifiers.rst): support for identifier names with special characters
* [Data types](./docs/user/general/datatypes.rst): new data types such as date time and interval
* [Expressions](./docs/user/dql/expressions.rst): new expression system that can represent and evaluate complex expressions
* [SQL functions](./docs/user/dql/functions.rst): many more string and date functions added
* [Basic queries](./docs/user/dql/basics.rst)
* Ordering by Aggregate Functions section
* NULLS FIRST/LAST in section Specifying Order for Null
* [Aggregations](./docs/user/dql/aggregations.rst): aggregation over expression and more other features
* [Complex queries](./docs/user/dql/complex.rst)
* Improvement on Subqueries in FROM clause
* [Window functions](./docs/user/dql/window.rst): ranking and aggregate window function support

To avoid impact on your side, normally you won't see any difference in query response. If you want to check if and why your query falls back to be handled by old SQL engine, please explain your query and check Elasticsearch log for "Request is falling back to old SQL engine due to ...".
Recently we have been actively improving our query engine primarily for better correctness and extensibility. Behind the scene, the new enhanced engine has already supported the new released Piped Processing Language. However, it was experimental and disabled by default for SQL query processing. With most important features and full testing complete, now we're ready to promote it as our default SQL query engine. Please find more details in [An Introduction to the New SQL Query Engine](/docs/dev/NewSQLEngine.md).


## Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,10 @@ public FunctionExpression nullif(Expression... expressions) {
return function(BuiltinFunctionName.NULLIF, expressions);
}

public FunctionExpression iffunction(Expression... expressions) {
return function(BuiltinFunctionName.IF, expressions);
}

public static Expression cases(Expression defaultResult,
WhenClause... whenClauses) {
return new CaseClause(Arrays.asList(whenClauses), defaultResult);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ public enum BuiltinFunctionName {
IS_NULL(FunctionName.of("is null")),
IS_NOT_NULL(FunctionName.of("is not null")),
IFNULL(FunctionName.of("ifnull")),
IF(FunctionName.of("if")),
NULLIF(FunctionName.of("nullif")),
ISNULL(FunctionName.of("isnull")),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

package com.amazon.opendistroforelasticsearch.sql.expression.operator.predicate;

import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_FALSE;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_MISSING;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_NULL;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;

Expand Down Expand Up @@ -59,6 +57,7 @@ public static void register(BuiltinFunctionRepository repository) {
repository.register(nullIf());
repository.register(isNull(BuiltinFunctionName.IS_NULL));
repository.register(isNull(BuiltinFunctionName.ISNULL));
repository.register(ifFunction());
}

private static FunctionResolver not() {
Expand Down Expand Up @@ -100,21 +99,28 @@ private static FunctionResolver isNotNull() {
Collectors.toList()));
}

private static FunctionResolver ifNull() {
FunctionName functionName = BuiltinFunctionName.IFNULL.getName();
private static FunctionResolver ifFunction() {
FunctionName functionName = BuiltinFunctionName.IF.getName();
List<ExprCoreType> typeList = ExprCoreType.coreTypes();

List<SerializableFunction<FunctionName, org.apache.commons.lang3.tuple.Pair<FunctionSignature,
FunctionBuilder>>> functionsOne = typeList.stream().map(v ->
impl((UnaryPredicateOperator::exprIfNull), v, v, v))
impl((UnaryPredicateOperator::exprIf), v, BOOLEAN, v, v))
.collect(Collectors.toList());

FunctionResolver functionResolver = FunctionDSL.define(functionName, functionsOne);
return functionResolver;
}

private static FunctionResolver ifNull() {
FunctionName functionName = BuiltinFunctionName.IFNULL.getName();
List<ExprCoreType> typeList = ExprCoreType.coreTypes();

List<SerializableFunction<FunctionName, org.apache.commons.lang3.tuple.Pair<FunctionSignature,
FunctionBuilder>>> functionsTwo = typeList.stream().map(v ->
impl((UnaryPredicateOperator::exprIfNull), v, UNKNOWN, v))
FunctionBuilder>>> functionsOne = typeList.stream().map(v ->
impl((UnaryPredicateOperator::exprIfNull), v, v, v))
.collect(Collectors.toList());

functionsOne.addAll(functionsTwo);
FunctionResolver functionResolver = FunctionDSL.define(functionName, functionsOne);
return functionResolver;
}
Expand Down Expand Up @@ -149,4 +155,8 @@ public static ExprValue exprNullIf(ExprValue v1, ExprValue v2) {
return v1.equals(v2) ? LITERAL_NULL : v1;
}

public static ExprValue exprIf(ExprValue v1, ExprValue v2, ExprValue v3) {
return !v1.isNull() && !v1.isMissing() && LITERAL_TRUE.equals(v1) ? v2 : v3;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ private static Stream<Arguments> isNullArguments() {
private static Stream<Arguments> ifNullArguments() {
ArrayList<Expression> exprValueArrayList = new ArrayList<>();
exprValueArrayList.add(DSL.literal(123));
exprValueArrayList.add(DSL.literal("test"));
exprValueArrayList.add(DSL.literal(LITERAL_NULL));
exprValueArrayList.add(DSL.literal(321));
exprValueArrayList.add(DSL.literal(""));
exprValueArrayList.add(DSL.literal(LITERAL_NULL));

return Lists.cartesianProduct(exprValueArrayList, exprValueArrayList).stream()
.map(list -> {
Expand Down Expand Up @@ -115,12 +115,30 @@ private static Stream<Arguments> nullIfArguments() {
});
}

private static Stream<Arguments> ifArguments() {
ArrayList<Expression> exprValueArrayList = new ArrayList<>();
exprValueArrayList.add(DSL.literal(LITERAL_TRUE));
exprValueArrayList.add(DSL.literal(LITERAL_FALSE));
exprValueArrayList.add(DSL.literal(LITERAL_NULL));
exprValueArrayList.add(DSL.literal(LITERAL_MISSING));

return Lists.cartesianProduct(exprValueArrayList, exprValueArrayList).stream()
.map(list -> {
Expression e1 = list.get(0);
if (e1.valueOf(valueEnv()).value() == LITERAL_TRUE.value()) {
return Arguments.of(e1, DSL.literal("123"), DSL.literal("321"), DSL.literal("123"));
} else {
return Arguments.of(e1, DSL.literal("123"), DSL.literal("321"), DSL.literal("321"));
}
});
}

private static Stream<Arguments> exprIfNullArguments() {
ArrayList<ExprValue> exprValues = new ArrayList<>();
exprValues.add(LITERAL_NULL);
exprValues.add(LITERAL_MISSING);
exprValues.add(ExprValueUtils.integerValue(123));
exprValues.add(ExprValueUtils.stringValue("test"));
exprValues.add(ExprValueUtils.integerValue(456));

return Lists.cartesianProduct(exprValues, exprValues).stream()
.map(list -> {
Expand Down Expand Up @@ -200,18 +218,24 @@ public void test_ifnull_predicate(Expression v1, Expression v2, Expression expec
assertEquals(expected.valueOf(valueEnv()), dsl.ifnull(v1, v2).valueOf(valueEnv()));
}

@ParameterizedTest
@MethodSource("exprIfNullArguments")
public void test_exprIfNull_predicate(ExprValue v1, ExprValue v2, ExprValue expected) {
assertEquals(expected.value(), UnaryPredicateOperator.exprIfNull(v1, v2).value());
}

@ParameterizedTest
@MethodSource("nullIfArguments")
public void test_nullif_predicate(Expression v1, Expression v2, Expression expected) {
assertEquals(expected.valueOf(valueEnv()), dsl.nullif(v1, v2).valueOf(valueEnv()));
}

@ParameterizedTest
@MethodSource("ifArguments")
public void test_if_predicate(Expression v1, Expression v2, Expression v3, Expression expected) {
assertEquals(expected.valueOf(valueEnv()), dsl.iffunction(v1, v2, v3).valueOf(valueEnv()));
}

@ParameterizedTest
@MethodSource("exprIfNullArguments")
public void test_exprIfNull_predicate(ExprValue v1, ExprValue v2, ExprValue expected) {
assertEquals(expected.value(), UnaryPredicateOperator.exprIfNull(v1, v2).value());
}

@ParameterizedTest
@MethodSource("exprNullIfArguments")
public void test_exprNullIf_predicate(ExprValue v1, ExprValue v2, ExprValue expected) {
Expand Down
73 changes: 73 additions & 0 deletions docs/dev/NewSQLEngine.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# An Introduction to the New SQL Query Engine

---
## 1.Motivations

The current SQL query engine provides users the basic query capability for using familiar SQL rather than complex Elasticsearch DSL. Based on NLPchina ES-SQL, many new features have been added additionally, such as semantic analyzer, semi-structured data query support, Hash Join etc. However, as we looked into more advanced SQL features, challenges started emerging especially in terms of correctness and extensibility (see [Attributions](../attributions.md)). After thoughtful consideration, we decided to develop a new query engine to address all the problems met so far.


---
## 2.What's New

With the architecture and extensibility improved significantly, the following SQL features are able to be introduced in the new query engine:

* [Identifiers](/docs/user/general/identifiers.rst): Support for identifier names with special characters
* [Data types](/docs/user/general/datatypes.rst): New data types such as date time and interval
* [Expressions](/docs/user/dql/expressions.rst): New expression system that can represent and evaluate complex expressions
* [SQL functions](/docs/user/dql/functions.rst): Many more string and date functions added
* [Basic queries](/docs/user/dql/basics.rst)
* Ordering by Aggregate Functions section
* NULLS FIRST/LAST in section Specifying Order for Null
* [Aggregations](/docs/user/dql/aggregations.rst):
* Aggregation over expression
* Selective aggregation by FILTER function
* [Complex queries](/docs/user/dql/complex.rst)
* Improvement on Subqueries in FROM clause
* [Window functions](/docs/user/dql/window.rst)
* Ranking window functions
* Aggregate window functions

As for correctness, besides full coverage of unit and integration test, we developed a new comparison test framework to ensure correctness by comparing with other databases. Please find more details in [Testing](./Testing.md).


---
## 3.What're Changed

### 3.1 Breaking Changes

Because of implementation changed internally, you can expect Explain output in a different format. For query protocol, there are slightly changes on two fields' value in the default response format:

* **Schema**: Previously the `name` and `alias` value differed for different queries. For consistency, name is always the original text now and alias is its alias defined in SELECT clause or absent if none.
* **Total**: The `total` field represented how many documents matched in total no matter how many returned (indicated by `size` field). However, this field becomes meaningless because of post processing on DSL response in the new query engine. Thus, for now the total number is always same as size field.

### 3.2 Limitations

You can find all the limitations in [Limitations](/docs/user/limitations/limitations.rst). For these unsupported features, the query will be forwarded to the old query engine by fallback mechanism. To avoid impact on your side, normally you won't see any difference in a query response. If you want to check if and why your query falls back to be handled by old SQL engine, please explain your query and check Elasticsearch log for "Request is falling back to old SQL engine due to ...".

Basically, here is a list of the features common though not supported in the new query engine yet:

* **Cursor**: request with `fetch_size` parameter
* **JSON response format**: will not be supported anymore in the new engine
* **Nested field query**: including supports for object field or nested field query
* **JOINs**: including all types of join queries
* **Elasticsearch functions**: fulltext search, metric and bucket functions

### 3.3 What if Something Wrong

No panic! You can roll back to old query engine easily by a plugin setting change. Simply run the command to disable it by [plugin setting](/docs/user/admin/settings.rst#opendistro-sql-engine-new-enabled). Same as other cluster setting change, no need to restart Elasticsearch and the change will take effect on next incoming query. Later on please report the issue to us.


---
## 4.How it's Implemented

If you're interested in the new query engine, please find more details in [Develop Guide](../developing.rst), [Architecture](./Architecture.md) and other docs in the dev folder.


---
## 5.What's Next

As mentioned in section 3.2 Limitations, there are still very popular SQL features unsupported yet in the new query engine yet. In particular, the following items are on our roadmap with high priority:

1. Object/Nested field queries
2. JOIN support
3. Elasticsearch functions
36 changes: 36 additions & 0 deletions docs/experiment/ppl/functions/condition.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,39 @@ Example::
| False | Quility | Nanette |
| True | null | Dale |
+----------+------------+-------------+

IF
------

Description
>>>>>>>>>>>

Usage: if(condition, expr1, expr2) return expr1 if condition is true, otherwiser return expr2.

Argument type: all the supported data type, (NOTE : if expr1 and expr2 are different type, you will fail semantic check

Return type: any

Example::

od> source=accounts | eval result = if(true, firstname, lastname) | fields result, firstname, lastname
fetched rows / total rows = 4/4
+----------+-------------+------------+
| result | firstname | lastname |
|----------+-------------+------------|
| Amber | Amber | Duke |
| Hattie | Hattie | Bond |
| Nanette | Nanette | Bates |
| Dale | Dale | Adams |
+----------+-------------+------------+

od> source=accounts | eval result = if(false, firstname, lastname) | fields result, firstname, lastname
fetched rows / total rows = 4/4
+----------+-------------+------------+
| result | firstname | lastname |
|----------+-------------+------------|
| Duke | Amber | Duke |
| Bond | Hattie | Bond |
| Bates | Nanette | Bates |
| Adams | Dale | Adams |
+----------+-------------+------------+
6 changes: 3 additions & 3 deletions docs/user/admin/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ Description

We are migrating existing functionalities to a new query engine under development. User can choose to enable the new engine if interested or disable if any issue found.

1. The default value is false.
1. The default value is true.
2. This setting is node scope.
3. This setting can be updated dynamically.

Expand All @@ -532,7 +532,7 @@ SQL query::

>> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_opendistro/_sql/settings -d '{
"transient" : {
"opendistro.sql.engine.new.enabled" : "true"
"opendistro.sql.engine.new.enabled" : "false"
}
}'

Expand All @@ -546,7 +546,7 @@ Result set::
"sql" : {
"engine" : {
"new" : {
"enabled" : "true"
"enabled" : "false"
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1982,6 +1982,40 @@ Example::
| True | False |
+---------------+---------------+

IF
------

Description
>>>>>>>>>>>

Specifications:

1. IF(condition, ES_TYPE1, ES_TYPE2) -> ES_TYPE1 or ES_TYPE2

Usage: if first parameter is true, return second parameter, otherwise return third one.

Argument type: condition as BOOLEAN, second and third can by any type

Return type: Any (NOTE : if parameters #2 and #3 has different type, you will fail semantic check"

Example::

od> SELECT IF(100 > 200, '100', '200')
fetched rows / total rows = 1/1
+-------------------------------+
| IF(100 > 200, '100', '200') |
|-------------------------------|
| 200 |
+-------------------------------+

od> SELECT IF(200 > 100, '100', '200')
fetched rows / total rows = 1/1
+-------------------------------+
| IF(200 > 100, '100', '200') |
|-------------------------------|
| 100 |
+-------------------------------+

CASE
----

Expand Down
3 changes: 0 additions & 3 deletions doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,4 @@ def load_tests(loader, suite, ignore):
# randomize order of tests to make sure they don't depend on each other
random.shuffle(tests)

# prepend a temporary doc to enable new engine so new SQL docs followed can pass
tests.insert(0, doc_suite('../docs/user/dql/newsql.rst'))

return DocTests(tests)
6 changes: 3 additions & 3 deletions integ-test/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ integTest {
systemProperty "user", System.getProperty("user")
systemProperty "password", System.getProperty("password")

// Enable new SQL engine
systemProperty 'enableNewEngine', 'false'

// Set default query size limit
systemProperty 'defaultQuerySizeLimit', '10000'

Expand Down Expand Up @@ -109,9 +112,6 @@ task integTestWithNewEngine(type: RestIntegTestTask) {
systemProperty "user", System.getProperty("user")
systemProperty "password", System.getProperty("password")

// Enable new SQL engine
systemProperty 'enableNewEngine', 'true'

// Set default query size limit
systemProperty 'defaultQuerySizeLimit', '10000'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import com.amazon.opendistroforelasticsearch.sql.legacy.utils.StringUtils;
import java.io.IOException;
import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Test;

public class OrdinalAliasRewriterIT extends SQLIntegTestCase {
Expand Down
Loading

0 comments on commit d8c3deb

Please sign in to comment.