Skip to content

Commit

Permalink
SQL: HAVING clause should accept only aggregates (#31872)
Browse files Browse the repository at this point in the history
Improve Verifier to allow HAVING clauses only on aggregates

Close #31726
  • Loading branch information
costin authored Jul 11, 2018
1 parent e955ffc commit 6136e49
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,11 @@ static Collection<Failure> verify(LogicalPlan plan) {
* Check validity of Aggregate/GroupBy.
* This rule is needed for multiple reasons:
* 1. a user might specify an invalid aggregate (SELECT foo GROUP BY bar)
* 2. the order/having might contain a non-grouped attribute. This is typically
* 2. the ORDER BY/HAVING might contain a non-grouped attribute. This is typically
* caught by the Analyzer however if wrapped in a function (ABS()) it gets resolved
* (because the expression gets resolved little by little without being pushed down,
* without the Analyzer modifying anything.
* 2a. HAVING also requires an Aggregate function
* 3. composite agg (used for GROUP BY) allows ordering only on the group keys
*/
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
Expand Down Expand Up @@ -244,7 +245,7 @@ private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailur
}

// make sure to compare attributes directly
if (Expressions.anyMatch(a.groupings(),
if (Expressions.anyMatch(a.groupings(),
g -> e.semanticEquals(e instanceof Attribute ? Expressions.attribute(g) : g))) {
return;
}
Expand Down Expand Up @@ -278,13 +279,14 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set<Failure> localFailu

Map<Expression, Node<?>> missing = new LinkedHashMap<>();
Expression condition = f.condition();
condition.collectFirstChildren(c -> checkGroupMatch(c, condition, a.groupings(), missing, functions));
// variation of checkGroupMatch customized for HAVING, which requires just aggregations
condition.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, condition, missing, functions));

if (!missing.isEmpty()) {
String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY;
localFailures.add(fail(condition, "Cannot filter by non-grouped column" + plural + " %s, expected %s",
Expressions.names(missing.keySet()),
Expressions.names(a.groupings())));
localFailures.add(
fail(condition, "Cannot filter HAVING on non-aggregate" + plural + " %s; consider using WHERE instead",
Expressions.names(missing.keySet())));
groupingFailures.add(a);
return false;
}
Expand All @@ -294,6 +296,57 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set<Failure> localFailu
}


private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> source,
Map<Expression, Node<?>> missing, Map<String, Function> functions) {

// resolve FunctionAttribute to backing functions
if (e instanceof FunctionAttribute) {
FunctionAttribute fa = (FunctionAttribute) e;
Function function = functions.get(fa.functionId());
// TODO: this should be handled by a different rule
if (function == null) {
return false;
}
e = function;
}

// scalar functions can be a binary tree
// first test the function against the grouping
// and if that fails, start unpacking hoping to find matches
if (e instanceof ScalarFunction) {
ScalarFunction sf = (ScalarFunction) e;

// unwrap function to find the base
for (Expression arg : sf.arguments()) {
arg.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, source, missing, functions));
}
return true;

} else if (e instanceof Score) {
// Score can't be used for having
missing.put(e, source);
return true;
}

// skip literals / foldable
if (e.foldable()) {
return true;
}
// skip aggs (allowed to refer to non-group columns)
if (Functions.isAggregate(e)) {
return true;
}

// left without leaves which have to match; that's a failure since everything should be based on an agg
if (e instanceof Attribute) {
missing.put(e, source);
return true;
}

return false;
}


// check whether plain columns specified in an agg are mentioned in the group-by
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures,
Set<LogicalPlan> groupingFailures, Map<String, Function> functions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,14 @@ public void testGroupByOrderByScore() {
assertEquals("1:44: Cannot order by non-grouped column [SCORE()], expected [int]",
verify("SELECT int FROM test GROUP BY int ORDER BY SCORE()"));
}

public void testHavingOnColumn() {
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
verify("SELECT int FROM test GROUP BY int HAVING int > 2"));
}

public void testHavingOnScalar() {
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
verify("SELECT int FROM test GROUP BY int HAVING 2 < ABS(int)"));
}
}

0 comments on commit 6136e49

Please sign in to comment.