Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL: Fix ORDER BY on aggregates and GROUPed BY fields #51894

Merged
merged 10 commits into from
Feb 12, 2020
48 changes: 47 additions & 1 deletion x-pack/plugin/sql/qa/src/main/resources/agg-ordering.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,50 @@ g:s | gender:s | s3:i | SUM(salary):i | s5:i
M |M |2671054|2671054 |2671054
F |F |1666196|1666196 |1666196
null |null |487605 |487605 |487605
;
;

histogramDateTimeWithCountAndOrder_1
schema::h:ts|c:l
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC, c ASC;

h | c
------------------------+---------------
1965-01-01T00:00:00.000Z|1
1964-01-01T00:00:00.000Z|4
1963-01-01T00:00:00.000Z|7
1962-01-01T00:00:00.000Z|6
1961-01-01T00:00:00.000Z|8
1960-01-01T00:00:00.000Z|8
1959-01-01T00:00:00.000Z|9
1958-01-01T00:00:00.000Z|7
1957-01-01T00:00:00.000Z|4
1956-01-01T00:00:00.000Z|5
1955-01-01T00:00:00.000Z|4
1954-01-01T00:00:00.000Z|8
1953-01-01T00:00:00.000Z|11
1952-01-01T00:00:00.000Z|8
null |10
;

histogramDateTimeWithCountAndOrder_2
schema::h:ts|c:l
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY c DESC, h ASC;

h | c
------------------------+---------------
1953-01-01T00:00:00.000Z|11
null |10
1959-01-01T00:00:00.000Z|9
1952-01-01T00:00:00.000Z|8
1954-01-01T00:00:00.000Z|8
1960-01-01T00:00:00.000Z|8
1961-01-01T00:00:00.000Z|8
1958-01-01T00:00:00.000Z|7
1963-01-01T00:00:00.000Z|7
1962-01-01T00:00:00.000Z|6
1956-01-01T00:00:00.000Z|5
1955-01-01T00:00:00.000Z|4
1957-01-01T00:00:00.000Z|4
1964-01-01T00:00:00.000Z|4
1965-01-01T00:00:00.000Z|1
;
27 changes: 24 additions & 3 deletions x-pack/plugin/sql/qa/src/main/resources/agg-ordering.sql-spec
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ aggNotSpecifiedInTheAggregateAndGroupWithHavingWithLimitAndDirection
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY MAX(salary) ASC, c DESC LIMIT 5;

groupAndAggNotSpecifiedInTheAggregateWithHaving
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender, MAX(salary);
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender NULLS FIRST, MAX(salary);

multipleAggsThatGetRewrittenWithAliasOnAMediumGroupBy
SELECT languages, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY languages ORDER BY max;
Expand Down Expand Up @@ -136,5 +136,26 @@ SELECT gender AS g, first_name AS f, last_name AS l FROM test_emp GROUP BY f, ge
multipleGroupingsAndOrderingByGroups_8
SELECT gender AS g, first_name, last_name FROM test_emp GROUP BY g, last_name, first_name ORDER BY gender ASC, first_name DESC, last_name ASC;

multipleGroupingsAndOrderingByGroupsWithFunctions
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender, c DESC, first_name, last_name ASC;
multipleGroupingsAndOrderingByGroupsAndAggs_1
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender ASC NULLS FIRST, MAX(salary) DESC;

multipleGroupingsAndOrderingByGroupsAndAggs_2
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender DESC NULLS LAST, MAX(salary) ASC;

multipleGroupingsAndOrderingByGroupsWithFunctions_1
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender NULLS FIRST, c DESC, first_name, last_name ASC;

multipleGroupingsAndOrderingByGroupsWithFunctions_2
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY c DESC, gender DESC NULLS LAST, first_name, last_name ASC;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_1
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 NULLS FIRST, 2, 3;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_2
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 DESC NULLS LAST, 2, 3;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_3
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 2, 1 NULLS FIRST, 3;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_4
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 3 DESC, 1 NULLS FIRST, 2;
Original file line number Diff line number Diff line change
Expand Up @@ -594,36 +594,51 @@ static class AggSortingQueue extends PriorityQueue<Tuple<List<?>, Integer>> {
this.sortingColumns = sortingColumns;
}

// compare row based on the received attribute sort
// if a sort item is not in the list, it is assumed the sorting happened in ES
// and the results are left as is (by using the row ordering), otherwise it is sorted based on the given criteria.
//
// Take for example ORDER BY a, x, b, y
// a, b - are sorted in ES
// x, y - need to be sorted client-side
// sorting on x kicks in, only if the values for a are equal.

/**
* Compare row based on the received attribute sort
* <ul>
* <li>
* If a tuple in {@code sortingColumns} has a null comparator, it is assumed the sorting
* happened in ES and the results are left as is (by using the row ordering), otherwise it is
* sorted based on the given criteria.
* </li>
* <li>
* If no tuple exists in {@code sortingColumns} for an output column, it means this column
* is not included at all in the ORDER BY
* </li>
*</ul>
*
* Take for example ORDER BY a, x, b, y
* a, b - are sorted in ES
* x, y - need to be sorted client-side
* sorting on x kicks in only if the values for a are equal.
* sorting on y kicks in only if the values for a, x and b are all equal
*
*/
// thanks to @jpountz for the row ordering idea as a way to preserve ordering
@SuppressWarnings("unchecked")
@Override
protected boolean lessThan(Tuple<List<?>, Integer> l, Tuple<List<?>, Integer> r) {
for (Tuple<Integer, Comparator> tuple : sortingColumns) {
int i = tuple.v1().intValue();
int columnIdx = tuple.v1().intValue();
Comparator comparator = tuple.v2();

Object vl = l.v1().get(i);
Object vr = r.v1().get(i);
// Get the values for left and right rows at the current column index
Object vl = l.v1().get(columnIdx);
Object vr = r.v1().get(columnIdx);
if (comparator != null) {
int result = comparator.compare(vl, vr);
// if things are equals, move to the next comparator
// if things are not equal: return the comparison result,
// otherwise: move to the next comparator to solve the tie.
if (result != 0) {
return result > 0;
}
}
// no comparator means the existing order needs to be preserved
// no comparator means the rows are pre-ordered by ES for the column at
// the current index and the existing order needs to be preserved
else {
// check the values - if they are equal move to the next comparator
// otherwise return the row order
// check the values - if they are not equal return the row order
// otherwise: move to the next comparator to solve the tie.
if (Objects.equals(vl, vr) == false) {
return l.v2().compareTo(r.v2()) > 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
source.sort("_doc");
return;
}
for (Sort sortable : container.sort()) {
for (Sort sortable : container.sort().values()) {
SortBuilder<?> sortBuilder = null;

if (sortable instanceof AttributeSort) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.elasticsearch.xpack.sql.querydsl.container.GlobalCountRef;
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef;
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef.Property;
import org.elasticsearch.xpack.sql.querydsl.container.GroupingFunctionSort;
import org.elasticsearch.xpack.sql.querydsl.container.MetricAggRef;
import org.elasticsearch.xpack.sql.querydsl.container.PivotColumnRef;
import org.elasticsearch.xpack.sql.querydsl.container.QueryContainer;
Expand Down Expand Up @@ -682,37 +683,36 @@ protected PhysicalPlan rule(OrderExec plan) {

// TODO: might need to validate whether the target field or group actually exist
if (group != null && group != Aggs.IMPLICIT_GROUP_KEY) {
// check whether the lookup matches a group
if (group.id().equals(lookup)) {
qContainer = qContainer.updateGroup(group.with(direction));
}
// else it's a leafAgg
else {
qContainer = qContainer.updateGroup(group.with(direction));
}
qContainer = qContainer.updateGroup(group.with(direction));
}

// field
if (orderExpression instanceof FieldAttribute) {
qContainer = qContainer.addSort(lookup,
new AttributeSort((FieldAttribute) orderExpression, direction, missing));
}
// scalar functions typically require script ordering
else if (orderExpression instanceof ScalarFunction) {
ScalarFunction sf = (ScalarFunction) orderExpression;
// nope, use scripted sorting
qContainer = qContainer.addSort(lookup, new ScriptSort(sf.asScript(), direction, missing));
}
// histogram
else if (orderExpression instanceof Histogram) {
qContainer = qContainer.addSort(lookup, new GroupingFunctionSort(direction, missing));
}
// score
else if (orderExpression instanceof Score) {
qContainer = qContainer.addSort(lookup, new ScoreSort(direction, missing));
}
// agg function
else if (orderExpression instanceof AggregateFunction) {
qContainer = qContainer.addSort(lookup,
new AggregateSort((AggregateFunction) orderExpression, direction, missing));
}
// unknown
else {
// scalar functions typically require script ordering
if (orderExpression instanceof ScalarFunction) {
ScalarFunction sf = (ScalarFunction) orderExpression;
// nope, use scripted sorting
qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing));
}
// score
else if (orderExpression instanceof Score) {
qContainer = qContainer.addSort(new ScoreSort(direction, missing));
}
// field
else if (orderExpression instanceof FieldAttribute) {
qContainer = qContainer.addSort(new AttributeSort((FieldAttribute) orderExpression, direction, missing));
}
// agg function
else if (orderExpression instanceof AggregateFunction) {
qContainer = qContainer.addSort(new AggregateSort((AggregateFunction) orderExpression, direction, missing));
} else {
// unknown
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
}
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.querydsl.container;

import java.util.Objects;

public class GroupingFunctionSort extends Sort {

public GroupingFunctionSort(Direction direction, Missing missing) {
super(direction, missing);
}

@Override
public int hashCode() {
return Objects.hash(direction(), missing());
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}

if (obj == null || getClass() != obj.getClass()) {
return false;
}

GroupingFunctionSort other = (GroupingFunctionSort) obj;
return Objects.equals(direction(), other.direction())
&& Objects.equals(missing(), other.missing());
}
}
Loading