From 3f421bafb11af58281448eedc2fa0960b060b716 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Mon, 9 Aug 2021 04:29:33 -0400 Subject: [PATCH] SQL: Disallow queries with inner LIMIT that cannot be executed in ES (#75960) (#76222) * Disallow queries with inner limits that cannot be executed correctly * address review comments * introduce PushDownAndCombineOrderBy optimization * Revert "introduce PushDownAndCombineOrderBy optimization" 8d8d992c188948e040eb57e3fa743ee8ba804a58 * use replaceChild Co-authored-by: Lukas Wegmann --- .../server/src/main/resources/select.sql-spec | 4 -- .../xpack/sql/parser/LogicalPlanBuilder.java | 11 ++- .../xpack/sql/planner/Verifier.java | 30 ++++---- .../xpack/sql/parser/SqlParserTests.java | 5 ++ .../xpack/sql/planner/VerifierTests.java | 70 ++++++++++++++++--- 5 files changed, 95 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/sql/qa/server/src/main/resources/select.sql-spec b/x-pack/plugin/sql/qa/server/src/main/resources/select.sql-spec index 304945c0dacc0..0a6d0558c20ab 100644 --- a/x-pack/plugin/sql/qa/server/src/main/resources/select.sql-spec +++ b/x-pack/plugin/sql/qa/server/src/main/resources/select.sql-spec @@ -153,14 +153,10 @@ selectOrderByOrderByOrderByLimit SELECT * FROM (SELECT * FROM (SELECT * FROM test_emp ORDER BY emp_no DESC) ORDER BY emp_no ASC) ORDER BY emp_no DESC LIMIT 5; selectOrderByOrderByOrderByLimitLimit SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM test_emp ORDER BY emp_no DESC) ORDER BY emp_no ASC) ORDER BY emp_no DESC LIMIT 12) LIMIT 6; -selectOrderByLimitSameOrderBy -SELECT * FROM (SELECT * FROM (SELECT * FROM test_emp LIMIT 10) ORDER BY emp_no) ORDER BY emp_no LIMIT 5; selectGroupByOrderByLimit SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp GROUP BY languages) ORDER BY max DESC LIMIT 3; selectGroupByOrderByLimitNulls SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp GROUP BY languages) ORDER BY max DESC NULLS FIRST LIMIT 3; -selectGroupByLimitOrderByLimit -SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp WHERE languages IS NOT NULL GROUP BY languages LIMIT 5) ORDER BY max DESC LIMIT 3; selectGroupByOrderByOrderByLimit SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp GROUP BY languages ORDER BY max ASC) ORDER BY max DESC NULLS FIRST LIMIT 4; selectGroupByOrderByOrderByLimitNulls diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/LogicalPlanBuilder.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/LogicalPlanBuilder.java index 1d38709b03c4a..5ed7085b9dbea 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/LogicalPlanBuilder.java @@ -100,7 +100,16 @@ public LogicalPlan visitQueryNoWith(QueryNoWithContext ctx) { if (ctx.orderBy().isEmpty() == false) { List orders = ctx.orderBy(); OrderByContext endContext = orders.get(orders.size() - 1); - plan = new OrderBy(source(ctx.ORDER(), endContext), plan, visitList(ctx.orderBy(), Order.class)); + Source source = source(ctx.ORDER(), endContext); + List order = visitList(ctx.orderBy(), Order.class); + + if (plan instanceof Limit) { + // Limit from TOP clauses must be the parent of the OrderBy clause + Limit limit = (Limit) plan; + plan = limit.replaceChild(new OrderBy(source, limit.child(), order)); + } else { + plan = new OrderBy(source, plan, order); + } } LimitClauseContext limitClause = ctx.limitClause(); diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/Verifier.java index ed5b5d017c682..d541564a1ed23 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/Verifier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/Verifier.java @@ -7,11 +7,14 @@ package org.elasticsearch.xpack.sql.planner; import org.elasticsearch.xpack.ql.common.Failure; -import org.elasticsearch.xpack.ql.expression.Order; import org.elasticsearch.xpack.ql.util.Holder; +import org.elasticsearch.xpack.sql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.sql.plan.physical.FilterExec; import org.elasticsearch.xpack.sql.plan.physical.LimitExec; import org.elasticsearch.xpack.sql.plan.physical.OrderExec; import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.sql.plan.physical.PivotExec; +import org.elasticsearch.xpack.sql.plan.physical.UnaryExec; import org.elasticsearch.xpack.sql.plan.physical.Unexecutable; import org.elasticsearch.xpack.sql.plan.physical.UnplannedExec; @@ -59,20 +62,23 @@ static List verifyExecutingPlan(PhysicalPlan plan) { } private static void checkForNonCollapsableSubselects(PhysicalPlan plan, List failures) { - Holder hasLimit = new Holder<>(Boolean.FALSE); - Holder> orderBy = new Holder<>(); + Holder limit = new Holder<>(); + Holder limitedExec = new Holder<>(); + plan.forEachUp(p -> { - if (hasLimit.get() == false && p instanceof LimitExec) { - hasLimit.set(Boolean.TRUE); - return; - } - if (p instanceof OrderExec) { - if (hasLimit.get() && orderBy.get() != null && ((OrderExec) p).order().equals(orderBy.get()) == false) { - failures.add(fail(p, "Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT")); - } else { - orderBy.set(((OrderExec) p).order()); + if (limit.get() == null && p instanceof LimitExec) { + limit.set((LimitExec) p); + } else if (limit.get() != null && limitedExec.get() == null) { + if (p instanceof OrderExec || p instanceof FilterExec || p instanceof PivotExec || p instanceof AggregateExec) { + limitedExec.set((UnaryExec) p); } } }); + + if (limitedExec.get() != null) { + failures.add( + fail(limit.get(), "LIMIT or TOP cannot be used in a subquery if outer query contains GROUP BY, ORDER BY, PIVOT or WHERE") + ); + } } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java index a95a84208e0c6..9719851fd1ec6 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java @@ -142,9 +142,14 @@ public void testTop() { public void testUseBothTopAndLimitInvalid() { ParsingException e = expectThrows(ParsingException.class, () -> parseStatement("SELECT TOP 10 * FROM test LIMIT 20")); assertEquals("line 1:28: TOP and LIMIT are not allowed in the same query - use one or the other", e.getMessage()); + e = expectThrows(ParsingException.class, () -> parseStatement("SELECT TOP 30 a, count(*) cnt FROM test WHERE b = 20 GROUP BY a HAVING cnt > 10 LIMIT 40")); assertEquals("line 1:82: TOP and LIMIT are not allowed in the same query - use one or the other", e.getMessage()); + + e = expectThrows(ParsingException.class, + () -> parseStatement("SELECT TOP 30 * FROM test ORDER BY a LIMIT 40")); + assertEquals("line 1:39: TOP and LIMIT are not allowed in the same query - use one or the other", e.getMessage()); } public void testsSelectNonReservedKeywords() { diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/VerifierTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/VerifierTests.java index 728947adc13bb..8c04f57fb4fac 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/VerifierTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/VerifierTests.java @@ -14,6 +14,8 @@ import org.elasticsearch.xpack.sql.analysis.analyzer.Verifier; import org.elasticsearch.xpack.sql.expression.function.SqlFunctionRegistry; import org.elasticsearch.xpack.sql.parser.SqlParser; +import org.elasticsearch.xpack.sql.plan.physical.EsQueryExec; +import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.sql.stats.Metrics; import static org.elasticsearch.xpack.sql.SqlTestUtils.TEST_CFG; @@ -33,10 +35,14 @@ public class VerifierTests extends ESTestCase { ); private final Planner planner = new Planner(); + private PhysicalPlan verify(String sql) { + return planner.plan(analyzer.analyze(parser.createStatement(sql), true), true); + } + private String error(String sql) { PlanningException e = expectThrows( PlanningException.class, - () -> planner.plan(analyzer.analyze(parser.createStatement(sql), true), true) + () -> verify(sql) ); String message = e.getMessage(); assertTrue(message.startsWith("Found ")); @@ -45,21 +51,28 @@ private String error(String sql) { return message.substring(index + pattern.length()); } + private String innerLimitMsg(int line, int column) { + return line + + ":" + + column + + ": LIMIT or TOP cannot be used in a subquery if outer query contains GROUP BY, ORDER BY, PIVOT or WHERE"; + } + public void testSubselectWithOrderByOnTopOfOrderByAndLimit() { assertEquals( - "1:60: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT", + innerLimitMsg(1, 50), error("SELECT * FROM (SELECT * FROM test ORDER BY 1 ASC LIMIT 10) ORDER BY 2") ); assertEquals( - "1:72: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT", + innerLimitMsg(1, 50), error("SELECT * FROM (SELECT * FROM (SELECT * FROM test LIMIT 10) ORDER BY 1) ORDER BY 2") ); assertEquals( - "1:75: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT", + innerLimitMsg(1, 66), error("SELECT * FROM (SELECT * FROM (SELECT * FROM test ORDER BY 1 ASC) LIMIT 5) ORDER BY 1 DESC") ); assertEquals( - "1:152: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT", + innerLimitMsg(1, 142), error("SELECT * FROM (" + "SELECT * FROM (" + "SELECT * FROM (" + @@ -68,17 +81,21 @@ public void testSubselectWithOrderByOnTopOfOrderByAndLimit() { "ORDER BY int DESC NULLS LAST LIMIT 12) " + "ORDER BY int DESC NULLS FIRST") ); + assertEquals( + innerLimitMsg(1, 50), + error("SELECT * FROM (SELECT * FROM (SELECT * FROM test LIMIT 10) ORDER BY 1 LIMIT 20) ORDER BY 2") + ); } public void testSubselectWithOrderByOnTopOfGroupByOrderByAndLimit() { assertEquals( - "1:96: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT", + innerLimitMsg(1, 86), error( "SELECT * FROM (SELECT max(int) AS max, bool FROM test GROUP BY bool ORDER BY max ASC LIMIT 10) ORDER BY max DESC" ) ); assertEquals( - "1:112: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT", + innerLimitMsg(1, 102), error( "SELECT * FROM (" + "SELECT * FROM (" @@ -88,7 +105,7 @@ public void testSubselectWithOrderByOnTopOfGroupByOrderByAndLimit() { ) ); assertEquals( - "1:186: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT", + innerLimitMsg(1, 176), error("SELECT * FROM (" + "SELECT * FROM (" + "SELECT * FROM (" + @@ -98,4 +115,41 @@ public void testSubselectWithOrderByOnTopOfGroupByOrderByAndLimit() { "ORDER BY max DESC NULLS FIRST") ); } + + public void testInnerLimitWithWhere() { + assertEquals(innerLimitMsg(1, 35), + error("SELECT * FROM (SELECT * FROM test LIMIT 10) WHERE int = 1")); + assertEquals(innerLimitMsg(1, 50), + error("SELECT * FROM (SELECT * FROM (SELECT * FROM test LIMIT 10)) WHERE int = 1")); + assertEquals(innerLimitMsg(1, 51), + error("SELECT * FROM (SELECT * FROM (SELECT * FROM test) LIMIT 10) WHERE int = 1")); + } + + public void testInnerLimitWithGroupBy() { + assertEquals(innerLimitMsg(1, 37), + error("SELECT int FROM (SELECT * FROM test LIMIT 10) GROUP BY int")); + assertEquals(innerLimitMsg(1, 52), + error("SELECT int FROM (SELECT * FROM (SELECT * FROM test LIMIT 10)) GROUP BY int")); + assertEquals(innerLimitMsg(1, 53), + error("SELECT int FROM (SELECT * FROM (SELECT * FROM test) LIMIT 10) GROUP BY int")); + } + + public void testInnerLimitWithPivot() { + assertEquals(innerLimitMsg(1, 52), + error("SELECT * FROM (SELECT int, bool, keyword FROM test LIMIT 10) PIVOT (AVG(int) FOR bool IN (true, false))")); + } + + public void testTopWithOrderBySucceeds() { + PhysicalPlan plan = verify("SELECT TOP 5 * FROM test ORDER BY int"); + assertEquals(EsQueryExec.class, plan.getClass()); + } + + public void testInnerTop() { + assertEquals(innerLimitMsg(1, 23), + error("SELECT * FROM (SELECT TOP 10 * FROM test) WHERE int = 1")); + assertEquals(innerLimitMsg(1, 23), + error("SELECT * FROM (SELECT TOP 10 * FROM test) ORDER BY int")); + assertEquals(innerLimitMsg(1, 23), + error("SELECT * FROM (SELECT TOP 10 int, bool, keyword FROM test) PIVOT (AVG(int) FOR bool IN (true, false))")); + } }