From 98471356c1c16e753dec68604f91defc89cca4d4 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Sat, 3 Apr 2021 18:46:16 +0300 Subject: [PATCH] SQL: Improve the optimization of null conditionals (#71192) Enhance the existing rules so that Coalesce(ex) -> ex NullIf(a, a) -> null NullIf(null, a) -> null NullIf(a, null) -> a --- .../predicate/conditional/NullIf.java | 22 +++++++- .../xpack/sql/optimizer/Optimizer.java | 55 ++++++++++++++----- .../xpack/sql/optimizer/OptimizerTests.java | 39 +++++++++---- .../sql/planner/QueryTranslatorTests.java | 33 +++++------ 4 files changed, 108 insertions(+), 41 deletions(-) diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/conditional/NullIf.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/conditional/NullIf.java index 924f67cb4ed75..bca6852c87a38 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/conditional/NullIf.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/conditional/NullIf.java @@ -26,8 +26,12 @@ */ public class NullIf extends ConditionalFunction { + private final Expression left, right; + public NullIf(Source source, Expression left, Expression right) { super(source, Arrays.asList(left, right)); + this.left = left; + this.right = right; } @Override @@ -40,9 +44,25 @@ public Expression replaceChildren(List newChildren) { return new NullIf(source(), newChildren.get(0), newChildren.get(1)); } + public Expression left() { + return left; + } + + public Expression right() { + return right; + } + + @Override + public boolean foldable() { + return left.semanticEquals(right) || super.foldable(); + } + @Override public Object fold() { - return NullIfProcessor.apply(children().get(0).fold(), children().get(1).fold()); + if (left.semanticEquals(right)) { + return null; + } + return NullIfProcessor.apply(left.fold(), right.fold()); } @Override diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java index 87f795fbfcb2e..49efdab71e669 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java @@ -82,8 +82,10 @@ import org.elasticsearch.xpack.sql.expression.predicate.conditional.ArbitraryConditionalFunction; import org.elasticsearch.xpack.sql.expression.predicate.conditional.Case; import org.elasticsearch.xpack.sql.expression.predicate.conditional.Coalesce; +import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction; import org.elasticsearch.xpack.sql.expression.predicate.conditional.IfConditional; import org.elasticsearch.xpack.sql.expression.predicate.conditional.Iif; +import org.elasticsearch.xpack.sql.expression.predicate.conditional.NullIf; import org.elasticsearch.xpack.sql.expression.predicate.operator.arithmetic.Mul; import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.In; import org.elasticsearch.xpack.sql.plan.logical.LocalRelation; @@ -701,27 +703,52 @@ static class SimplifyConditional extends OptimizerExpressionRule { @Override protected Expression rule(Expression e) { - if (e instanceof ArbitraryConditionalFunction) { - ArbitraryConditionalFunction c = (ArbitraryConditionalFunction) e; + if (e instanceof ConditionalFunction) { + List children = e.children(); + // optimize nullIf + if (e instanceof NullIf) { + NullIf nullIf = (NullIf) e; + if (Expressions.isNull(nullIf.left()) || Expressions.isNull(nullIf.right())) { + return nullIf.left(); + } + } + if (e instanceof ArbitraryConditionalFunction) { + ArbitraryConditionalFunction c = (ArbitraryConditionalFunction) e; + + // there's no need for a conditional if all the children are the same (this includes the case of just one value) + if (c instanceof Coalesce && children.size() > 0) { + Expression firstChild = children.get(0); + boolean sameChild = true; + for (int i = 1; i < children.size(); i++) { + Expression child = children.get(i); + if (firstChild.semanticEquals(child) == false) { + sameChild = false; + break; + } + } + if (sameChild) { + return firstChild; + } + } - // exclude any nulls found - List newChildren = new ArrayList<>(); - for (Expression child : c.children()) { - if (Expressions.isNull(child) == false) { - newChildren.add(child); + // exclude any nulls found + List newChildren = new ArrayList<>(); + for (Expression child : children) { + if (Expressions.isNull(child) == false) { + newChildren.add(child); - // For Coalesce find the first non-null foldable child (if any) and break early - if (e instanceof Coalesce && child.foldable()) { - break; + // For Coalesce find the first non-null foldable child (if any) and break early + if (e instanceof Coalesce && child.foldable()) { + break; + } } } - } - if (newChildren.size() < c.children().size()) { - return c.replaceChildren(newChildren); + if (newChildren.size() < children.size()) { + return c.replaceChildren(newChildren); + } } } - return e; } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java index 615f49b7c27c6..1124ad6d8f18f 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java @@ -128,6 +128,7 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; import static org.elasticsearch.xpack.ql.TestUtils.equalsOf; +import static org.elasticsearch.xpack.ql.TestUtils.fieldAttribute; import static org.elasticsearch.xpack.ql.TestUtils.greaterThanOf; import static org.elasticsearch.xpack.ql.TestUtils.greaterThanOrEqualOf; import static org.elasticsearch.xpack.ql.TestUtils.lessThanOf; @@ -307,6 +308,16 @@ public void testMathFolding() { assertEquals(Math.E, foldFunction(new E(EMPTY))); } + public void testNullIfFolding() { + assertEquals(null, foldFunction(new NullIf(EMPTY, ONE, ONE))); + assertEquals(1, foldFunction(new NullIf(EMPTY, ONE, TWO))); + assertEquals(2, foldFunction(new NullIf(EMPTY, TWO, ONE))); + + FieldAttribute fa = fieldAttribute(); + // works even if the expressions are not foldable + assertEquals(null, foldFunction(new NullIf(EMPTY, fa, fa))); + } + private static Object foldFunction(Function f) { return ((Literal) new ConstantFolding().rule(f)).value(); } @@ -432,16 +443,19 @@ public void testNullFoldingDoesNotApplyOnArbitraryConditionals() throws Exceptio assertEquals(conditionalFunction, rule.rule(conditionalFunction)); } - public void testSimplifyCoalesceNulls() { - Expression e = new SimplifyConditional().rule(new Coalesce(EMPTY, asList(NULL, NULL))); - assertEquals(Coalesce.class, e.getClass()); - assertEquals(0, e.children().size()); - } - public void testSimplifyCoalesceRandomNulls() { Expression e = new SimplifyConditional().rule(new Coalesce(EMPTY, randomListOfNulls())); - assertEquals(Coalesce.class, e.getClass()); - assertEquals(0, e.children().size()); + assertEquals(NULL, e); + } + + public void testSimplifyCoalesceWithOnlyOneChild() { + Expression fa = fieldAttribute(); + assertSame(fa, new SimplifyConditional().rule(new Coalesce(EMPTY, singletonList(fa)))); + } + + public void testSimplifyCoalesceSameExpression() { + Expression fa = fieldAttribute(); + assertSame(fa, new SimplifyConditional().rule(new Coalesce(EMPTY, randomList(2, 10, () -> fa)))); } public void testSimplifyCoalesceRandomNullsWithValue() { @@ -470,8 +484,7 @@ public void testSimplifyCoalesceFirstLiteral() { public void testSimplifyIfNullNulls() { Expression e = new SimplifyConditional().rule(new IfNull(EMPTY, NULL, NULL)); - assertEquals(IfNull.class, e.getClass()); - assertEquals(0, e.children().size()); + assertEquals(NULL, e); } public void testSimplifyIfNullWithNullAndValue() { @@ -494,6 +507,12 @@ public void testFoldNullNotAppliedOnNullIf() { assertEquals(orig, f); } + public void testSimplifyNullIf() { + assertEquals(ONE, new SimplifyConditional().rule(new NullIf(EMPTY, ONE, NULL))); + assertEquals(NULL, new SimplifyConditional().rule(new NullIf(EMPTY, NULL, ONE))); + assertEquals(NULL, new SimplifyConditional().rule(new NullIf(EMPTY, NULL, NULL))); + } + public void testSimplifyGreatestNulls() { Expression e = new SimplifyConditional().rule(new Greatest(EMPTY, asList(NULL, NULL))); assertEquals(Greatest.class, e.getClass()); diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index 91af05d50b62a..c0392efa8031f 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -1000,7 +1000,7 @@ public void testTranslateIsNotNullExpression_HavingClause_Painless() { public void testTranslateCoalesceExpression_WhereGroupByAndHaving_Painless() { - PhysicalPlan p = optimizeAndPlan("SELECT COALESCE(null, int) AS c, COALESCE(max(date), NULL) as m FROM test " + + PhysicalPlan p = optimizeAndPlan("SELECT COALESCE(int, 2) AS c, COALESCE(max(date), '2020-01-01'::date) as m FROM test " + "WHERE c > 10 GROUP BY c HAVING m > '2020-01-01'::date"); assertTrue(p instanceof EsQueryExec); EsQueryExec esQExec = (EsQueryExec) p; @@ -1011,19 +1011,20 @@ public void testTranslateCoalesceExpression_WhereGroupByAndHaving_Painless() { String aggName = aggBuilder.getSubAggregations().iterator().next().getName(); String havingName = aggBuilder.getPipelineAggregations().iterator().next().getName(); assertThat(aggBuilder.toString(), containsString("{\"terms\":{\"script\":{\"source\":\"" + - "InternalSqlScriptUtils.coalesce([InternalQlScriptUtils.docValue(doc,params.v0)])\",\"lang\":\"painless\"" + - ",\"params\":{\"v0\":\"int\"}},\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"}}}]}," + + "InternalSqlScriptUtils.coalesce([InternalQlScriptUtils.docValue(doc,params.v0),params.v1])\",\"lang\":\"painless\"" + + ",\"params\":{\"v0\":\"int\",\"v1\":2}},\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"}}}]}," + "\"aggregations\":{\"" + aggName + "\":{\"max\":{\"field\":\"date\"}},\"" + havingName + "\":" + "{\"bucket_selector\":{\"buckets_path\":{\"a0\":\"" + aggName + "\"},\"script\":{\"source\":\"" + "InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(InternalSqlScriptUtils.coalesce(" + - "[InternalSqlScriptUtils.asDateTime(params.a0)]),InternalSqlScriptUtils.asDateTime(params.v0)))\"," + - "\"lang\":\"painless\",\"params\":{\"v0\":\"2020-01-01T00:00:00.000Z\"}}")); + "[InternalSqlScriptUtils.asDateTime(params.a0),InternalSqlScriptUtils.asDateTime(params.v0)])," + + "InternalSqlScriptUtils.asDateTime(params.v1)))\"," + + "\"lang\":\"painless\",\"params\":{\"v0\":\"2020-01-01T00:00:00.000Z\",\"v1\":\"2020-01-01T00:00:00.000Z")); assertTrue(esQExec.queryContainer().query() instanceof ScriptQuery); ScriptQuery sq = (ScriptQuery) esQExec.queryContainer().query(); assertEquals("InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(" + - "InternalSqlScriptUtils.coalesce([InternalQlScriptUtils.docValue(doc,params.v0)]),params.v1))", + "InternalSqlScriptUtils.coalesce([InternalQlScriptUtils.docValue(doc,params.v0),params.v1]),params.v2))", sq.script().toString()); - assertEquals("[{v=int}, {v=10}]", sq.script().params().toString()); + assertEquals("[{v=int}, {v=2}, {v=10}]", sq.script().params().toString()); } public void testTranslateInExpression_WhereClause() { @@ -2562,7 +2563,7 @@ public void testSubqueryFilterOrderByAlias() throws Exception { "WHERE i IS NOT NULL " + "ORDER BY i"); } - + public void testSubqueryGroupByFilterAndOrderByByAlias() throws Exception { PhysicalPlan p = optimizeAndPlan("SELECT i FROM " + "( SELECT int AS i FROM test ) " + @@ -2620,32 +2621,32 @@ public void testSubqueryWithAliasOrderByAlias() throws Exception { "( SELECT int AS i FROM test ) AS s " + "ORDER BY s.i > 10"); } - + public void testMultiLevelSubqueriesOrderByByAlias() { optimizeAndPlan("SELECT i AS j FROM ( SELECT int AS i FROM test) ORDER BY j"); optimizeAndPlan("SELECT j AS k FROM (SELECT i AS j FROM ( SELECT int AS i FROM test)) ORDER BY k"); } - + public void testSubqueryGroupByOrderByAliasedExpression() { optimizeAndPlan("SELECT int_group AS g, min_date AS d " + - "FROM (" + + "FROM (" + " SELECT int % 2 AS int_group, MIN(date) AS min_date " + " FROM test WHERE date > '1970-01-01'::datetime " + - " GROUP BY int_group" + + " GROUP BY int_group" + ") " + "ORDER BY d DESC"); } public void testSubqueryGroupByOrderByAliasedAggFunction() { optimizeAndPlan("SELECT int_group AS g, min_date AS d " + - "FROM (" + + "FROM (" + " SELECT int % 2 AS int_group, MIN(date) AS min_date " + " FROM test WHERE date > '1970-01-01'::datetime " + - " GROUP BY int_group " + + " GROUP BY int_group " + ")" + "ORDER BY g DESC"); } - + public void testMultiLevelSubquerySelectStar() { optimizeAndPlan("SELECT * FROM (SELECT * FROM ( SELECT * FROM test ))"); optimizeAndPlan("SELECT * FROM (SELECT * FROM ( SELECT * FROM test ) b) c"); @@ -2659,7 +2660,7 @@ public void testMultiLevelSubqueryGroupBy() { public void testSubqueryGroupByFilterAndOrderByByRealiased() { optimizeAndPlan("SELECT g as h FROM (SELECT date AS f, int AS g FROM test) WHERE h IS NOT NULL GROUP BY h ORDER BY h ASC"); } - + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/69758") public void testFilterAfterGroupBy() { optimizeAndPlan("SELECT j AS k FROM (SELECT i AS j FROM ( SELECT int AS i FROM test) GROUP BY j) WHERE j < 5");