From f373020349a7d3ab2a7fefa7ebe07f69b64d5f5a Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Thu, 30 Jan 2020 14:48:34 +0100 Subject: [PATCH] SQL: Fix ORDER BY YEAR() function (#51562) Previously, if YEAR() was used as and ORDER BY argument without being wrapped with another scalar (e.g. YEAR(birth_date) + 10), no script ordering was used but instead the underlying field (e.g. birth_date) was used instead as a performance optimisation. This works correctly if YEAR() is the only ORDER BY arg but if further args are used as tie breakers for the ordering wrong results are produced. This is because 2 rows with the different birth_date but on the same year are not tied as the underlying ordering is on birth_date and not on the YEAR(birth_date), and the following ORDER BY args are ignored. Remove this optimisation for YEAR() to avoid incorrect results in such cases. As a consequence another bug is revealed: scalar functions on top of nested fields produce scripted sorting/filtering which is not yet supported. In such cases no error was thrown but instead all values for such nested fields were null and were passed to the script implementing the sorting/filtering, producing incorrect results. Detect such cases and throw a validation exception. Fixes: #51224 (cherry picked from commit f41efd6753dc3650a7eabb3e07b02b3b32c5704c) --- docs/reference/sql/limitations.asciidoc | 28 ++++++++++ .../function/scalar/ScalarFunction.java | 11 +--- .../qa/src/main/resources/datetime.sql-spec | 6 ++ .../qa/src/main/resources/docs/docs.csv-spec | 2 +- .../sql/qa/src/main/resources/nested.csv-spec | 24 ++------ .../xpack/sql/analysis/analyzer/Verifier.java | 55 +++++++++++++++---- .../function/scalar/datetime/Year.java | 5 -- .../xpack/sql/planner/QueryFolder.java | 17 +----- .../analyzer/VerifierErrorMessagesTests.java | 35 ++++++++++++ .../sql/planner/QueryTranslatorTests.java | 14 +++++ 10 files changed, 137 insertions(+), 60 deletions(-) diff --git a/docs/reference/sql/limitations.asciidoc b/docs/reference/sql/limitations.asciidoc index 07bd3ec9142a4..328c11e1ae2f3 100644 --- a/docs/reference/sql/limitations.asciidoc +++ b/docs/reference/sql/limitations.asciidoc @@ -31,6 +31,34 @@ For example: SELECT dep.dep_name.keyword FROM test_emp GROUP BY languages; -------------------------------------------------- +[float] +=== Scalar functions on nested fields are not allowed in `WHERE` and `ORDER BY` clauses +{es-sql} doesn't support the usage of scalar functions on top of nested fields in `WHERE` +and `ORDER BY` clauses with the exception of comparison and logical operators. + +For example: + +[source, sql] +-------------------------------------------------- +SELECT * FROM test_emp WHERE LENGTH(dep.dep_name.keyword) > 5; +-------------------------------------------------- + +and + +[source, sql] +-------------------------------------------------- +SELECT * FROM test_emp ORDER BY YEAR(dep.start_date); +-------------------------------------------------- + +are not supported but: + +[source, sql] +-------------------------------------------------- +SELECT * FROM test_emp WHERE dep.start_date >= CAST('2020-01-01' AS DATE) OR dep.dep_end_date IS NULL; +-------------------------------------------------- + +is supported. + [float] === Multi-nested fields diff --git a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java index bcb3cbcabcaf9..823465b28b271 100644 --- a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java +++ b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java @@ -39,17 +39,9 @@ protected ScalarFunction(Source source, List fields) { super(source, fields); } - // used if the function is monotonic and thus does not have to be computed for ordering purposes - // null means the script needs to be used; expression means the field/expression to be used instead - public Expression orderBy() { - return null; - } - - // // Script generation // - public ScriptTemplate asScript(Expression exp) { if (exp.foldable()) { return scriptWithFoldable(exp); @@ -73,7 +65,6 @@ public ScriptTemplate asScript(Expression exp) { throw new QlIllegalArgumentException("Cannot evaluate script for expression {}", exp); } - protected ScriptTemplate scriptWithFoldable(Expression foldable) { Object fold = foldable.fold(); @@ -144,4 +135,4 @@ protected String processScript(String script) { protected String formatTemplate(String template) { return Scripts.formatTemplate(template); } -} \ No newline at end of file +} diff --git a/x-pack/plugin/sql/qa/src/main/resources/datetime.sql-spec b/x-pack/plugin/sql/qa/src/main/resources/datetime.sql-spec index a0982d2483732..553d5bc5e16cd 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/datetime.sql-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/datetime.sql-spec @@ -128,6 +128,12 @@ SELECT first_name FROM test_emp ORDER BY NOW(), first_name NULLS LAST LIMIT 5; groupByCurrentTimestamp SELECT MAX(salary) AS max FROM test_emp GROUP BY NOW(); +// +// ORDER BY +// +orderByYear +SELECT YEAR(birth_date) as year, emp_no FROM test_emp ORDER BY year NULLS FIRST, emp_no ASC; + // // H2 uses the local timezone instead of the specified one // diff --git a/x-pack/plugin/sql/qa/src/main/resources/docs/docs.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/docs/docs.csv-spec index 4567e60ef761b..9a59c6161bc72 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/docs/docs.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/docs/docs.csv-spec @@ -939,7 +939,7 @@ SELECT -2 * INTERVAL '3' YEARS AS result; /////////////////////////////// // -// Order by +// Order By // /////////////////////////////// diff --git a/x-pack/plugin/sql/qa/src/main/resources/nested.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/nested.csv-spec index 5f50a6cbe5bf6..ccc864bc1425e 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/nested.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/nested.csv-spec @@ -102,27 +102,15 @@ Mayuko | 12 ; selectWithScalarOnNested -SELECT first_name f, last_name l, YEAR(dep.from_date) start FROM test_emp WHERE dep.dep_name = 'Production' AND languages > 1 ORDER BY start LIMIT 5; +SELECT first_name f, last_name l, YEAR(dep.from_date) start FROM test_emp WHERE dep.dep_name = 'Production' AND languages > 1 ORDER BY dep.from_date LIMIT 5; f:s | l:s | start:i -Sreekrishna |Servieres |1985 -Zhongwei |Rosen |1986 -Chirstian |Koblick |1986 -null |Chappelet |1988 -Zvonko |Nyanchama |1989 -; - -selectWithScalarOnNestedWithoutProjection -SELECT first_name f, last_name l FROM test_emp WHERE dep.dep_name = 'Production' AND languages > 1 ORDER BY YEAR(dep.from_date) LIMIT 5; - -f:s | l:s - -Sreekrishna |Servieres -Zhongwei |Rosen -Chirstian |Koblick -null |Chappelet -Zvonko |Nyanchama +Sreekrishna |Servieres |1985 +Zhongwei |Rosen |1986 +Chirstian |Koblick |1986 +null |Chappelet |1988 +Zvonko |Nyanchama |1989 ; // diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java index b2818ee21454f..36053bf6db4ed 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java @@ -23,6 +23,9 @@ import org.elasticsearch.xpack.ql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.ql.expression.predicate.fulltext.FullTextPredicate; +import org.elasticsearch.xpack.ql.expression.predicate.logical.BinaryLogic; +import org.elasticsearch.xpack.ql.expression.predicate.logical.Not; +import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.ql.plan.logical.Aggregate; import org.elasticsearch.xpack.ql.plan.logical.Filter; import org.elasticsearch.xpack.ql.plan.logical.Limit; @@ -40,6 +43,8 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.Max; import org.elasticsearch.xpack.sql.expression.function.aggregate.Min; import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; +import org.elasticsearch.xpack.sql.expression.predicate.nulls.IsNotNull; +import org.elasticsearch.xpack.sql.expression.predicate.nulls.IsNull; import org.elasticsearch.xpack.sql.plan.logical.Distinct; import org.elasticsearch.xpack.sql.plan.logical.LocalRelation; import org.elasticsearch.xpack.sql.plan.logical.Pivot; @@ -255,7 +260,7 @@ Collection verify(LogicalPlan plan) { } checkForScoreInsideFunctions(p, localFailures); - checkNestedUsedInGroupByOrHaving(p, localFailures); + checkNestedUsedInGroupByOrHavingOrWhereOrOrderBy(p, localFailures, attributeRefs); checkForGeoFunctionsOnDocValues(p, localFailures); checkPivot(p, localFailures, attributeRefs); @@ -735,17 +740,21 @@ private static void checkForScoreInsideFunctions(LogicalPlan p, Set loc Function.class)); } - private static void checkNestedUsedInGroupByOrHaving(LogicalPlan p, Set localFailures) { + private static void checkNestedUsedInGroupByOrHavingOrWhereOrOrderBy(LogicalPlan p, Set localFailures, + AttributeMap attributeRefs) { List nested = new ArrayList<>(); - Consumer match = fa -> { + Consumer matchNested = fa -> { if (fa.isNested()) { nested.add(fa); } }; + Consumer checkForNested = e -> + attributeRefs.getOrDefault(e, e).forEachUp(matchNested, FieldAttribute.class); + Consumer checkForNestedInFunction = f -> f.arguments().forEach( + arg -> arg.forEachUp(matchNested, FieldAttribute.class)); // nested fields shouldn't be used in aggregates or having (yet) - p.forEachDown(a -> a.groupings().forEach(agg -> agg.forEachUp(match, FieldAttribute.class)), Aggregate.class); - + p.forEachDown(a -> a.groupings().forEach(agg -> agg.forEachUp(checkForNested)), Aggregate.class); if (!nested.isEmpty()) { localFailures.add( fail(nested.get(0), "Grouping isn't (yet) compatible with nested fields " + new AttributeSet(nested).names())); @@ -753,15 +762,39 @@ private static void checkNestedUsedInGroupByOrHaving(LogicalPlan p, Set } // check in having - p.forEachDown(f -> { - if (f.child() instanceof Aggregate) { - f.condition().forEachUp(match, FieldAttribute.class); - } - }, Filter.class); - + p.forEachDown(f -> f.forEachDown(a -> f.condition().forEachUp(checkForNested), Aggregate.class), Filter.class); if (!nested.isEmpty()) { localFailures.add( fail(nested.get(0), "HAVING isn't (yet) compatible with nested fields " + new AttributeSet(nested).names())); + nested.clear(); + } + + // check in where (scalars not allowed) + p.forEachDown(f -> f.condition().forEachUp(e -> + attributeRefs.getOrDefault(e, e).forEachUp(sf -> { + if (sf instanceof BinaryComparison == false && + sf instanceof IsNull == false && + sf instanceof IsNotNull == false && + sf instanceof Not == false && + sf instanceof BinaryLogic== false) { + checkForNestedInFunction.accept(sf); + }}, ScalarFunction.class) + ), Filter.class); + if (!nested.isEmpty()) { + localFailures.add( + fail(nested.get(0), "WHERE isn't (yet) compatible with scalar functions on nested fields " + + new AttributeSet(nested).names())); + nested.clear(); + } + + // check in order by (scalars not allowed) + p.forEachDown(ob -> ob.order().forEach(o -> o.forEachUp(e -> + attributeRefs.getOrDefault(e, e).forEachUp(checkForNestedInFunction, ScalarFunction.class) + )), OrderBy.class); + if (!nested.isEmpty()) { + localFailures.add( + fail(nested.get(0), "ORDER BY isn't (yet) compatible with scalar functions on nested fields " + + new AttributeSet(nested).names())); } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java index e97689c6b9e22..fd48fedd7f4c4 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java @@ -39,11 +39,6 @@ public String dateTimeFormat() { return "year"; } - @Override - public Expression orderBy() { - return field(); - } - @Override public String calendarInterval() { return YEAR_INTERVAL; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java index 335df0603b148..ecc86ce7a2f38 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java @@ -695,21 +695,8 @@ protected PhysicalPlan rule(OrderExec plan) { // scalar functions typically require script ordering if (orderExpression instanceof ScalarFunction) { ScalarFunction sf = (ScalarFunction) orderExpression; - // is there an expression to order by? - if (sf.orderBy() != null) { - Expression orderBy = sf.orderBy(); - if (orderBy instanceof NamedExpression) { - orderBy = qContainer.aliases().getOrDefault(orderBy, orderBy); - qContainer = qContainer - .addSort(new AttributeSort(((NamedExpression) orderBy).toAttribute(), direction, missing)); - } else if (orderBy.foldable() == false) { - // ignore constant - throw new PlanningException("does not know how to order by expression {}", orderBy); - } - } else { - // nope, use scripted sorting - qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing)); - } + // nope, use scripted sorting + qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing)); } // score else if (orderExpression instanceof Score) { diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java index 35872f3a6f6fa..a3d55a19ec15a 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java @@ -470,11 +470,46 @@ public void testGroupByOnInexact() { public void testGroupByOnNested() { assertEquals("1:38: Grouping isn't (yet) compatible with nested fields [dep.dep_id]", error("SELECT dep.dep_id FROM test GROUP BY dep.dep_id")); + assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id]", + error("SELECT dep.dep_id AS a FROM test GROUP BY a")); + assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id]", + error("SELECT dep.dep_id AS a FROM test GROUP BY 1")); + assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id, dep.start_date]", + error("SELECT dep.dep_id AS a, dep.start_date AS b FROM test GROUP BY 1, 2")); + assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id, dep.start_date]", + error("SELECT dep.dep_id AS a, dep.start_date AS b FROM test GROUP BY a, b")); } public void testHavingOnNested() { assertEquals("1:51: HAVING isn't (yet) compatible with nested fields [dep.start_date]", error("SELECT int FROM test GROUP BY int HAVING AVG(YEAR(dep.start_date)) > 1980")); + assertEquals("1:22: HAVING isn't (yet) compatible with nested fields [dep.start_date]", + error("SELECT int, AVG(YEAR(dep.start_date)) AS average FROM test GROUP BY int HAVING average > 1980")); + assertEquals("1:22: HAVING isn't (yet) compatible with nested fields [dep.start_date, dep.end_date]", + error("SELECT int, AVG(YEAR(dep.start_date)) AS a, MAX(MONTH(dep.end_date)) AS b " + + "FROM test GROUP BY int " + + "HAVING a > 1980 AND b < 10")); + } + + public void testWhereOnNested() { + assertEquals("1:33: WHERE isn't (yet) compatible with scalar functions on nested fields [dep.start_date]", + error("SELECT int FROM test WHERE YEAR(dep.start_date) + 10 > 0")); + assertEquals("1:13: WHERE isn't (yet) compatible with scalar functions on nested fields [dep.start_date]", + error("SELECT YEAR(dep.start_date) + 10 AS a FROM test WHERE int > 10 AND (int < 3 OR NOT (a > 5))")); + accept("SELECT int FROM test WHERE dep.start_date > '2020-01-30'::date AND (int > 10 OR dep.end_date IS NULL)"); + accept("SELECT int FROM test WHERE dep.start_date > '2020-01-30'::date AND (int > 10 OR dep.end_date IS NULL) " + + "OR NOT(dep.start_date >= '2020-01-01')"); + } + + public void testOrderByOnNested() { + assertEquals("1:36: ORDER BY isn't (yet) compatible with scalar functions on nested fields [dep.start_date]", + error("SELECT int FROM test ORDER BY YEAR(dep.start_date) + 10")); + assertEquals("1:13: ORDER BY isn't (yet) compatible with scalar functions on nested fields [dep.start_date]", + error("SELECT YEAR(dep.start_date) + 10 FROM test ORDER BY 1")); + assertEquals("1:13: ORDER BY isn't (yet) compatible with scalar functions on nested fields " + + "[dep.start_date, dep.end_date]", + error("SELECT YEAR(dep.start_date) + 10 AS a, MONTH(dep.end_date) - 10 as b FROM test ORDER BY 1, 2")); + accept("SELECT int FROM test ORDER BY dep.start_date, dep.end_date"); } public void testGroupByScalarFunctionWithAggOnTarget() { diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index 53fce394ac6c2..bf1aeab38c432 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -1025,6 +1025,20 @@ public void testGroupByYearAndScalarsQueryTranslator() { "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}")); } + public void testOrderByYear() { + PhysicalPlan p = optimizeAndPlan("SELECT YEAR(date) FROM test ORDER BY 1"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("YEAR(date)", eqe.output().get(0).qualifiedName()); + assertEquals(INTEGER, eqe.output().get(0).dataType()); + assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""), + endsWith("\"sort\":[{\"_script\":{\"script\":{\"source\":\"InternalSqlScriptUtils.nullSafeSortNumeric(" + + "InternalSqlScriptUtils.dateTimeChrono(InternalSqlScriptUtils.docValue(doc,params.v0)," + + "params.v1,params.v2))\",\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"Z\"," + + "\"v2\":\"YEAR\"}},\"type\":\"number\",\"order\":\"asc\"}}]}")); + } + public void testGroupByHistogramWithDate() { LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(CAST(date AS DATE), INTERVAL 2 MONTHS)"); assertTrue(p instanceof Aggregate);