elastic · costin · Feb 2, 2019 · Feb 1, 2019
diff --git a/docs/reference/sql/limitations.asciidoc b/docs/reference/sql/limitations.asciidoc
@@ -67,8 +67,18 @@ a field is an array (has multiple values) or not, so without reading all the dat
 === Sorting by aggregation
 
 When doing aggregations (`GROUP BY`) {es-sql} relies on {es}'s `composite` aggregation for its support for paginating results.
-But this type of aggregation does come with a limitation: sorting can only be applied on the key used for the aggregation's buckets. This
-means that queries like `SELECT * FROM test GROUP BY age ORDER BY COUNT(*)` are not possible.
+However this type of aggregation does come with a limitation: sorting can only be applied on the key used for the aggregation's buckets. 
+{es-sql} overcomes this limitation by doing client-side sorting however as a safety measure, allows only up to *512* rows.
+
+It is recommended to use `LIMIT` for queries that use sorting by aggregation, essentially indicating the top N results that are desired:
+
+[source, sql]
+--------------------------------------------------
+SELECT * FROM test GROUP BY age ORDER BY COUNT(*) LIMIT 100;
+--------------------------------------------------
+
+It is possible to run the same queries without a `LIMIT` however in that case if the maximum size (*512*) is passed, an exception will be
+returned as {es-sql} is unable to track (and sort) all the results returned.
 
 [float]
 === Using aggregation functions on top of scalar functions

diff --git a/...qa/single-node/src/test/java/org/elasticsearch/xpack/sql/qa/single_node/CliExplainIT.java b/...qa/single-node/src/test/java/org/elasticsearch/xpack/sql/qa/single_node/CliExplainIT.java
@@ -20,7 +20,7 @@ public void testExplainBasic() throws IOException {
         assertThat(readLine(), startsWith("----------"));
         assertThat(readLine(), startsWith("With[{}]"));
         assertThat(readLine(), startsWith("\\_Project[[?*]]"));
-        assertThat(readLine(), startsWith("  \\_UnresolvedRelation[[][index=test],null,Unknown index [test]]"));
+        assertThat(readLine(), startsWith("  \\_UnresolvedRelation[test]"));
         assertEquals("", readLine());
 
         assertThat(command("EXPLAIN " + (randomBoolean() ? "" : "(PLAN ANALYZED) ") + "SELECT * FROM test"), containsString("plan"));
@@ -64,22 +64,22 @@ public void testExplainWithWhere() throws IOException {
         assertThat(readLine(), startsWith("----------"));
         assertThat(readLine(), startsWith("With[{}]"));
         assertThat(readLine(), startsWith("\\_Project[[?*]]"));
-        assertThat(readLine(), startsWith("  \\_Filter[i = 2#"));
-        assertThat(readLine(), startsWith("    \\_UnresolvedRelation[[][index=test],null,Unknown index [test]]"));
+        assertThat(readLine(), startsWith("  \\_Filter[Equals[?i,2"));
+        assertThat(readLine(), startsWith("    \\_UnresolvedRelation[test]"));
         assertEquals("", readLine());
 
         assertThat(command("EXPLAIN " + (randomBoolean() ? "" : "(PLAN ANALYZED) ") + "SELECT * FROM test WHERE i = 2"),
                 containsString("plan"));
         assertThat(readLine(), startsWith("----------"));
         assertThat(readLine(), startsWith("Project[[i{f}#"));
-        assertThat(readLine(), startsWith("\\_Filter[i = 2#"));
+        assertThat(readLine(), startsWith("\\_Filter[Equals[i"));
         assertThat(readLine(), startsWith("  \\_EsRelation[test][i{f}#"));
         assertEquals("", readLine());
 
         assertThat(command("EXPLAIN (PLAN OPTIMIZED) SELECT * FROM test WHERE i = 2"), containsString("plan"));
         assertThat(readLine(), startsWith("----------"));
         assertThat(readLine(), startsWith("Project[[i{f}#"));
-        assertThat(readLine(), startsWith("\\_Filter[i = 2#"));
+        assertThat(readLine(), startsWith("\\_Filter[Equals[i"));
         assertThat(readLine(), startsWith("  \\_EsRelation[test][i{f}#"));
         assertEquals("", readLine());
 
@@ -124,20 +124,20 @@ public void testExplainWithCount() throws IOException {
         assertThat(command("EXPLAIN (PLAN PARSED) SELECT COUNT(*) FROM test"), containsString("plan"));
         assertThat(readLine(), startsWith("----------"));
         assertThat(readLine(), startsWith("With[{}]"));
-        assertThat(readLine(), startsWith("\\_Project[[?COUNT(*)]]"));
-        assertThat(readLine(), startsWith("  \\_UnresolvedRelation[[][index=test],null,Unknown index [test]]"));
+        assertThat(readLine(), startsWith("\\_Project[[?COUNT[?*]]]"));
+        assertThat(readLine(), startsWith("  \\_UnresolvedRelation[test]"));
         assertEquals("", readLine());
 
         assertThat(command("EXPLAIN " + (randomBoolean() ? "" : "(PLAN ANALYZED) ") + "SELECT COUNT(*) FROM test"),
                 containsString("plan"));
         assertThat(readLine(), startsWith("----------"));
-        assertThat(readLine(), startsWith("Aggregate[[],[COUNT(*)#"));
+        assertThat(readLine(), startsWith("Aggregate[[],[Count[*=1"));
         assertThat(readLine(), startsWith("\\_EsRelation[test][i{f}#"));
         assertEquals("", readLine());
 
         assertThat(command("EXPLAIN (PLAN OPTIMIZED) SELECT COUNT(*) FROM test"), containsString("plan"));
         assertThat(readLine(), startsWith("----------"));
-        assertThat(readLine(), startsWith("Aggregate[[],[COUNT(*)#"));
+        assertThat(readLine(), startsWith("Aggregate[[],[Count[*=1"));
         assertThat(readLine(), startsWith("\\_EsRelation[test][i{f}#"));
         assertEquals("", readLine());
 

diff --git a/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/cli/ErrorsTestCase.java b/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/cli/ErrorsTestCase.java
@@ -73,7 +73,7 @@ public void testSelectProjectScoreInAggContext() throws Exception {
     public void testSelectOrderByScoreInAggContext() throws Exception {
         index("test", body -> body.field("foo", 1));
         assertFoundOneProblem(command("SELECT foo, COUNT(*) FROM test GROUP BY foo ORDER BY SCORE()"));
-        assertEquals("line 1:54: Cannot order by non-grouped column [SCORE()], expected [foo]" + END, readLine());
+        assertEquals("line 1:54: Cannot order by non-grouped column [SCORE()], expected [foo] or an aggregate function" + END, readLine());
     }
 
     @Override

diff --git a/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/ErrorsTestCase.java b/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/ErrorsTestCase.java
@@ -81,7 +81,9 @@ public void testSelectOrderByScoreInAggContext() throws Exception {
         try (Connection c = esJdbc()) {
             SQLException e = expectThrows(SQLException.class, () ->
                 c.prepareStatement("SELECT foo, COUNT(*) FROM test GROUP BY foo ORDER BY SCORE()").executeQuery());
-            assertEquals("Found 1 problem(s)\nline 1:54: Cannot order by non-grouped column [SCORE()], expected [foo]", e.getMessage());
+            assertEquals(
+                    "Found 1 problem(s)\nline 1:54: Cannot order by non-grouped column [SCORE()], expected [foo] or an aggregate function",
+                    e.getMessage());
         }
     }
 

diff --git a/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/SqlSpecTestCase.java b/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/SqlSpecTestCase.java
@@ -38,6 +38,7 @@ public static List<Object[]> readScriptSpec() throws Exception {
         tests.addAll(readScriptSpec("/datetime.sql-spec", parser));
         tests.addAll(readScriptSpec("/math.sql-spec", parser));
         tests.addAll(readScriptSpec("/agg.sql-spec", parser));
+        tests.addAll(readScriptSpec("/agg-ordering.sql-spec", parser));
         tests.addAll(readScriptSpec("/arithmetic.sql-spec", parser));
         tests.addAll(readScriptSpec("/string-functions.sql-spec", parser));
         tests.addAll(readScriptSpec("/case-functions.sql-spec", parser));

diff --git a/x-pack/plugin/sql/qa/src/main/resources/agg-ordering.sql-spec b/x-pack/plugin/sql/qa/src/main/resources/agg-ordering.sql-spec
@@ -0,0 +1,87 @@
+//
+// Custom sorting/ordering on aggregates
+//
+
+countWithImplicitGroupBy
+SELECT MAX(salary) AS m FROM test_emp ORDER BY COUNT(*);
+
+countWithImplicitGroupByWithHaving
+SELECT MAX(salary) AS m FROM test_emp HAVING MIN(salary) > 1 ORDER BY COUNT(*);
+
+countAndMaxWithImplicitGroupBy
+SELECT MAX(salary) AS m FROM test_emp ORDER BY MAX(salary), COUNT(*);
+
+maxWithAliasWithImplicitGroupBy
+SELECT MAX(salary) AS m FROM test_emp ORDER BY m;
+
+maxWithAliasWithImplicitGroupByAndHaving
+SELECT MAX(salary) AS m FROM test_emp HAVING COUNT(*) > 1 ORDER BY m;
+
+multipleOrderWithImplicitGroupByWithHaving
+SELECT MAX(salary) AS m FROM test_emp HAVING MIN(salary) > 1 ORDER BY COUNT(*), m DESC;
+
+multipleOrderWithImplicitGroupByWithoutAlias
+SELECT MAX(salary) AS m FROM test_emp HAVING MIN(salary) > 1 ORDER BY COUNT(*), MIN(salary) DESC;
+
+multipleOrderWithImplicitGroupByOfOrdinals
+SELECT MAX(salary) AS max, MIN(salary) AS min FROM test_emp HAVING MIN(salary) > 1 ORDER BY 1, COUNT(*), 2 DESC;
+
+aggWithoutAlias
+SELECT MAX(salary) AS max FROM test_emp GROUP BY gender ORDER BY MAX(salary);
+
+aggWithAlias
+SELECT MAX(salary) AS m FROM test_emp GROUP BY gender ORDER BY m;
+
+multipleAggsThatGetRewrittenWithoutAlias
+SELECT MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY gender ORDER BY MAX(salary);
+
+multipleAggsThatGetRewrittenWithAliasDesc
+SELECT MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY gender ORDER BY 1 DESC;
+
+multipleAggsThatGetRewrittenWithAlias
+SELECT MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY gender ORDER BY max;
+
+aggNotSpecifiedInTheAggregate
+SELECT MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender ORDER BY MAX(salary);
+
+aggNotSpecifiedInTheAggregatePlusOrdinal
+SELECT MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender ORDER BY MAX(salary), 2 DESC;
+
+aggNotSpecifiedInTheAggregateWithHaving
+SELECT MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY MAX(salary);
+
+aggNotSpecifiedInTheAggregateWithHavingDesc
+SELECT MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY MAX(salary) DESC;
+
+aggNotSpecifiedInTheAggregateAndGroupWithHaving
+SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY MAX(salary), gender;
+
+groupAndAggNotSpecifiedInTheAggregateWithHaving
+SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender, MAX(salary);
+
+multipleAggsThatGetRewrittenWithAliasOnAMediumGroupBy
+SELECT languages, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY languages ORDER BY max;
+
+multipleAggsThatGetRewrittenWithAliasOnALargeGroupBy
+SELECT emp_no, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY emp_no ORDER BY max;
+
+multipleAggsThatGetRewrittenWithAliasOnAMediumGroupByWithHaving
+SELECT languages, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY languages HAVING min BETWEEN 1000 AND 99999 ORDER BY max;
+
+aggNotSpecifiedInTheAggregatemultipleAggsThatGetRewrittenWithAliasOnALargeGroupBy
+SELECT emp_no, MIN(salary) AS min FROM test_emp GROUP BY emp_no ORDER BY MAX(salary);
+
+aggNotSpecifiedWithHavingOnLargeGroupBy
+SELECT MAX(salary) AS max FROM test_emp GROUP BY emp_no HAVING AVG(salary) > 1000 ORDER BY MIN(salary);
+
+aggWithTieBreakerDescAsc
+SELECT emp_no, MIN(languages) AS min FROM test_emp GROUP BY emp_no ORDER BY MIN(languages) DESC NULLS FIRST, emp_no ASC;
+
+aggWithTieBreakerDescDesc
+SELECT emp_no, MIN(languages) AS min FROM test_emp GROUP BY emp_no ORDER BY MIN(languages) DESC NULLS FIRST, emp_no DESC;
+
+aggWithTieBreakerAscDesc
+SELECT emp_no, MIN(languages) AS min FROM test_emp GROUP BY emp_no ORDER BY MAX(languages) ASC NULLS FIRST, emp_no DESC;
+
+aggWithMixOfOrdinals
+SELECT gender AS g, MAX(salary) AS m FROM test_emp GROUP BY gender ORDER BY 2 DESC LIMIT 3;
diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java
@@ -52,6 +52,8 @@
 import org.elasticsearch.xpack.sql.type.DataTypes;
 import org.elasticsearch.xpack.sql.type.InvalidMappedField;
 import org.elasticsearch.xpack.sql.type.UnsupportedEsField;
+import org.elasticsearch.xpack.sql.util.CollectionUtils;
+import org.elasticsearch.xpack.sql.util.Holder;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -106,7 +108,8 @@ protected Iterable<RuleExecutor<LogicalPlan>.Batch> batches() {
                 new ResolveFunctions(),
                 new ResolveAliases(),
                 new ProjectedAggregations(),
-                new ResolveAggsInHaving()
+                new ResolveAggsInHaving(),
+                new ResolveAggsInOrderBy()
                 //new ImplicitCasting()
                 );
         Batch finish = new Batch("Finish Analysis",
@@ -926,62 +929,57 @@ protected LogicalPlan rule(Project p) {
     // Handle aggs in HAVING. To help folding any aggs not found in Aggregation
     // will be pushed down to the Aggregate and then projected. This also simplifies the Verifier's job.
     //
-    private class ResolveAggsInHaving extends AnalyzeRule<LogicalPlan> {
+    private class ResolveAggsInHaving extends AnalyzeRule<Filter> {
 
         @Override
         protected boolean skipResolved() {
             return false;
         }
 
         @Override
-        protected LogicalPlan rule(LogicalPlan plan) {
+        protected LogicalPlan rule(Filter f) {
             // HAVING = Filter followed by an Agg
-            if (plan instanceof Filter) {
-                Filter f = (Filter) plan;
-                if (f.child() instanceof Aggregate && f.child().resolved()) {
-                    Aggregate agg = (Aggregate) f.child();
+            if (f.child() instanceof Aggregate && f.child().resolved()) {
+                Aggregate agg = (Aggregate) f.child();
 
-                    Set<NamedExpression> missing = null;
-                    Expression condition = f.condition();
+                Set<NamedExpression> missing = null;
+                Expression condition = f.condition();
 
-                    // the condition might contain an agg (AVG(salary)) that could have been resolved
-                    // (salary cannot be pushed down to Aggregate since there's no grouping and thus the function wasn't resolved either)
+                // the condition might contain an agg (AVG(salary)) that could have been resolved
+                // (salary cannot be pushed down to Aggregate since there's no grouping and thus the function wasn't resolved either)
 
-                    // so try resolving the condition in one go through a 'dummy' aggregate
-                    if (!condition.resolved()) {
-                        // that's why try to resolve the condition
-                        Aggregate tryResolvingCondition = new Aggregate(agg.source(), agg.child(), agg.groupings(),
-                                combine(agg.aggregates(), new Alias(f.source(), ".having", condition)));
+                // so try resolving the condition in one go through a 'dummy' aggregate
+                if (!condition.resolved()) {
+                    // that's why try to resolve the condition
+                    Aggregate tryResolvingCondition = new Aggregate(agg.source(), agg.child(), agg.groupings(),
+                            combine(agg.aggregates(), new Alias(f.source(), ".having", condition)));
 
-                        tryResolvingCondition = (Aggregate) analyze(tryResolvingCondition, false);
+                    tryResolvingCondition = (Aggregate) analyze(tryResolvingCondition, false);
 
-                        // if it got resolved
-                        if (tryResolvingCondition.resolved()) {
-                            // replace the condition with the resolved one
-                            condition = ((Alias) tryResolvingCondition.aggregates()
-                                .get(tryResolvingCondition.aggregates().size() - 1)).child();
-                        } else {
-                            // else bail out
-                            return plan;
-                        }
+                    // if it got resolved
+                    if (tryResolvingCondition.resolved()) {
+                        // replace the condition with the resolved one
+                        condition = ((Alias) tryResolvingCondition.aggregates()
+                            .get(tryResolvingCondition.aggregates().size() - 1)).child();
+                    } else {
+                        // else bail out
+                        return f;
                     }
+                }
 
-                    missing = findMissingAggregate(agg, condition);
-
-                    if (!missing.isEmpty()) {
-                        Aggregate newAgg = new Aggregate(agg.source(), agg.child(), agg.groupings(),
-                                combine(agg.aggregates(), missing));
-                        Filter newFilter = new Filter(f.source(), newAgg, condition);
-                        // preserve old output
-                        return new Project(f.source(), newFilter, f.output());
-                    }
+                missing = findMissingAggregate(agg, condition);
 
-                    return new Filter(f.source(), f.child(), condition);
+                if (!missing.isEmpty()) {
+                    Aggregate newAgg = new Aggregate(agg.source(), agg.child(), agg.groupings(),
+                            combine(agg.aggregates(), missing));
+                    Filter newFilter = new Filter(f.source(), newAgg, condition);
+                    // preserve old output
+                    return new Project(f.source(), newFilter, f.output());
                 }
-                return plan;
-            }
 
-            return plan;
+                return new Filter(f.source(), f.child(), condition);
+            }
+            return f;
         }
 
         private Set<NamedExpression> findMissingAggregate(Aggregate target, Expression from) {
@@ -1001,6 +999,66 @@ private Set<NamedExpression> findMissingAggregate(Aggregate target, Expression f
         }
     }
 
+
+    //
+    // Handle aggs in ORDER BY. To help folding any aggs not found in Aggregation
+    // will be pushed down to the Aggregate and then projected. This also simplifies the Verifier's job.
+    // Similar to Having however using a different matching pattern since HAVING is always Filter with Agg,
+    // while an OrderBy can have multiple intermediate nodes (Filter,Project, etc...)
+    //
+    private static class ResolveAggsInOrderBy extends AnalyzeRule<OrderBy> {
+
+        @Override
+        protected boolean skipResolved() {
+            return false;
+        }
+
+        @Override
+        protected LogicalPlan rule(OrderBy ob) {
+            List<Order> orders = ob.order();
+
+            // 1. collect aggs inside an order by
+            List<NamedExpression> aggs = new ArrayList<>();
+            for (Order order : orders) {
+                if (Functions.isAggregate(order.child())) {
+                    aggs.add(Expressions.wrapAsNamed(order.child()));
+                }
+            }
+            if (aggs.isEmpty()) {
+                return ob;
+            }
+
+            // 2. find first Aggregate child and update it
+            final Holder<Boolean> found = new Holder<>(Boolean.FALSE);
+
+            LogicalPlan plan = ob.transformDown(a -> {
+                if (found.get() == Boolean.FALSE) {
+                    found.set(Boolean.TRUE);
+
+                    List<NamedExpression> missing = new ArrayList<>();
+
+                    for (NamedExpression orderedAgg : aggs) {
+                        if (Expressions.anyMatch(a.aggregates(), e -> Expressions.equalsAsAttribute(e, orderedAgg)) == false) {
+                            missing.add(orderedAgg);
+                        }
+                    }
+                    // agg already contains all aggs
+                    if (missing.isEmpty() == false) {
+                        // save aggregates
+                        return new Aggregate(a.source(), a.child(), a.groupings(), CollectionUtils.combine(a.aggregates(), missing));
+                    }
+                }
+                return a;
+            }, Aggregate.class);
+
+            // if the plan was updated, project the initial aggregates
+            if (plan != ob) {
+                return new Project(ob.source(), plan, ob.output());
+            }
+            return ob;
+        }
+    }
+
     private class PruneDuplicateFunctions extends AnalyzeRule<LogicalPlan> {
 
         @Override