Skip to content

Commit

Permalink
opt: support grouping by aliases
Browse files Browse the repository at this point in the history
There is some baggage left over from SQL92 which allowed grouping by
select targets by their alias. We implement the same rules used by
postgres, as explained in the `buildGroupingColumns` comment.

Fixes #28059.

Release note (sql change): It is now supported to specify selection
target aliases as GROUP BY columns. Note that the FROM columns take
precedence over the aliases, which are only used if there is no column
with that name in the current scope.
  • Loading branch information
RaduBerinde committed Nov 23, 2019
1 parent 6d4fd89 commit 566be28
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 20 deletions.
109 changes: 90 additions & 19 deletions pkg/sql/opt/optbuilder/groupby.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,14 +304,14 @@ func (b *Builder) constructGroupBy(
// buildGroupingColumns builds the grouping columns and adds them to the
// groupby scopes that will be used to build the aggregation expression.
// Returns the slice of grouping columns.
func (b *Builder) buildGroupingColumns(sel *tree.SelectClause, fromScope *scope) {
func (b *Builder) buildGroupingColumns(sel *tree.SelectClause, projectionsScope, fromScope *scope) {
if fromScope.groupby == nil {
fromScope.initGrouping()
}
g := fromScope.groupby

// The "from" columns are visible to any grouping expressions.
b.buildGroupingList(sel.GroupBy, sel.Exprs, fromScope)
b.buildGroupingList(sel.GroupBy, sel.Exprs, projectionsScope, fromScope)

// Copy the grouping columns to the aggOutScope.
g.aggOutScope.appendColumns(g.groupingCols())
Expand Down Expand Up @@ -458,7 +458,7 @@ func (b *Builder) buildHaving(having tree.TypedExpr, fromScope *scope) opt.Scala
// indicates that the grouping is on the second select expression, k.
// fromScope The scope for the input to the aggregation (the FROM clause).
func (b *Builder) buildGroupingList(
groupBy tree.GroupBy, selects tree.SelectExprs, fromScope *scope,
groupBy tree.GroupBy, selects tree.SelectExprs, projectionsScope *scope, fromScope *scope,
) {
g := fromScope.groupby
g.groupStrs = make(groupByStrSet, len(groupBy))
Expand All @@ -476,7 +476,7 @@ func (b *Builder) buildGroupingList(
// a grouping error until the grouping columns are fully built.
g.buildingGroupingCols = true
for _, e := range groupBy {
b.buildGrouping(e, selects, fromScope, g.aggInScope)
b.buildGrouping(e, selects, projectionsScope, fromScope, g.aggInScope)
}
g.buildingGroupingCols = false
}
Expand All @@ -486,26 +486,97 @@ func (b *Builder) buildGroupingList(
// groupStrs and to the aggInScope.
//
//
// groupBy The given GROUP BY expression.
// selects The select expressions are needed in case the GROUP BY expression
// is an index into to the select list.
// fromScope The scope for the input to the aggregation (the FROM clause).
// aggInScope The scope that will contain the grouping expressions as well as
// the aggregate function arguments.
// groupBy The given GROUP BY expression.
// selects The select expressions are needed in case the GROUP BY
// expression is an index into to the select list.
// projectionsScope The scope that contains the columns for the SELECT targets
// (used when GROUP BY refers to a target by alias).
// fromScope The scope for the input to the aggregation (the FROM
// clause).
// aggInScope The scope that will contain the grouping expressions as well
// as the aggregate function arguments.
func (b *Builder) buildGrouping(
groupBy tree.Expr, selects tree.SelectExprs, fromScope, aggInScope *scope,
groupBy tree.Expr, selects tree.SelectExprs, projectionsScope, fromScope, aggInScope *scope,
) {
// Unwrap parenthesized expressions like "((a))" to "a".
groupBy = tree.StripParens(groupBy)

// Check whether the GROUP BY clause refers to a column in the SELECT list
// by index, e.g. `SELECT a, SUM(b) FROM y GROUP BY 1`.
col := colIndex(len(selects), groupBy, "GROUP BY")
alias := ""
if col != -1 {
groupBy = selects[col].Expr
alias = string(selects[col].As)
}

// Comment below pasted from PostgreSQL (findTargetListEntrySQL92 in
// src/backend/parser/parse_clause.c).
//
// Handle two special cases as mandated by the SQL92 spec:
//
// 1. Bare ColumnName (no qualifier or subscripts)
// For a bare identifier, we search for a matching column name
// in the existing target list. Multiple matches are an error
// unless they refer to identical values; for example,
// we allow SELECT a, a FROM table ORDER BY a
// but not SELECT a AS b, b FROM table ORDER BY b
// If no match is found, we fall through and treat the identifier
// as an expression.
// For GROUP BY, it is incorrect to match the grouping item against
// targetlist entries: according to SQL92, an identifier in GROUP BY
// is a reference to a column name exposed by FROM, not to a target
// list column. However, many implementations (including pre-7.0
// PostgreSQL) accept this anyway. So for GROUP BY, we look first
// to see if the identifier matches any FROM column name, and only
// try for a targetlist name if it doesn't. This ensures that we
// adhere to the spec in the case where the name could be both.
// DISTINCT ON isn't in the standard, so we can do what we like there;
// we choose to make it work like ORDER BY, on the rather flimsy
// grounds that ordinary DISTINCT works on targetlist entries.
//
// 2. IntegerConstant
// This means to use the n'th item in the existing target list.
// Note that it would make no sense to order/group/distinct by an
// actual constant, so this does not create a conflict with SQL99.
// GROUP BY column-number is not allowed by SQL92, but since
// the standard has no other behavior defined for this syntax,
// we may as well accept this common extension.

// This function sets groupBy and alias in these special cases.
func() {
// Check whether the GROUP BY clause refers to a column in the SELECT list
// by index, e.g. `SELECT a, SUM(b) FROM y GROUP BY 1` (case 2 above).
if col := colIndex(len(selects), groupBy, "GROUP BY"); col != -1 {
groupBy, alias = selects[col].Expr, string(selects[col].As)
return
}

if name, ok := groupBy.(*tree.UnresolvedName); ok {
if name.NumParts != 1 || name.Star {
return
}
// Case 1 above.
targetName := name.Parts[0]

// We must prefer a match against a FROM-clause column (but ignore upper
// scopes); in this case we let the general case below handle the reference.
for i := range fromScope.cols {
if string(fromScope.cols[i].name) == targetName {
return
}
}
// See if it matches exactly one of the target lists.
var match *scopeColumn
for i := range projectionsScope.cols {
if col := &projectionsScope.cols[i]; string(col.name) == targetName {
if match != nil {
// Multiple matches are only allowed if they refer to identical
// expressions.
if match.getExprStr() != col.getExprStr() {
panic(pgerror.Newf(pgcode.AmbiguousColumn, "GROUP BY %q is ambiguous", targetName))
}
}
match = col
}
}
if match != nil {
groupBy, alias = match.expr, targetName
}
}
}()

// We need to save and restore the previous value of the field in semaCtx
// in case we are recursively called within a subquery context.
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/optbuilder/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ func (b *Builder) buildSelectClause(
// Grouping columns must be built before building the projection list so
// we can check that any column references that appear in the SELECT list
// outside of aggregate functions are present in the grouping list.
b.buildGroupingColumns(sel, fromScope)
b.buildGroupingColumns(sel, projectionsScope, fromScope)
having = b.buildHaving(havingExpr, fromScope)
}

Expand Down
141 changes: 141 additions & 0 deletions pkg/sql/opt/optbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -3727,3 +3727,144 @@ project
└── eq [type=bool]
├── variable: k [type=int]
└── variable: a [type=int]

# Tests with aliases (see #28059).
build
SELECT x + 1 AS z FROM abxy GROUP BY z
----
group-by
├── columns: z:5(int)
├── grouping columns: z:5(int)
└── project
├── columns: z:5(int)
├── scan abxy
│ └── columns: a:1(int!null) b:2(int!null) x:3(int) y:4(int)
└── projections
└── plus [type=int]
├── variable: x [type=int]
└── const: 1 [type=int]

# The FROM column has precedence, we should be grouping by abxy.x, not by x%10.
build
SELECT (x % 10) AS x FROM abxy GROUP BY x
----
project
├── columns: x:5(int)
├── group-by
│ ├── columns: abxy.x:3(int)
│ ├── grouping columns: abxy.x:3(int)
│ └── project
│ ├── columns: abxy.x:3(int)
│ └── scan abxy
│ └── columns: a:1(int!null) b:2(int!null) abxy.x:3(int) y:4(int)
└── projections
└── mod [type=int]
├── variable: abxy.x [type=int]
└── const: 10 [type=int]

# But aliases have precedence over columns from higher scopes. Here we are
# grouping by v, not by the outer x.
build
SELECT x, (SELECT v AS x FROM kv GROUP BY x) FROM abxy
----
project
├── columns: x:3(int) x:9(int)
├── scan abxy
│ └── columns: a:1(int!null) b:2(int!null) abxy.x:3(int) y:4(int)
└── projections
└── subquery [type=int]
└── max1-row
├── columns: v:6(int)
└── group-by
├── columns: v:6(int)
├── grouping columns: v:6(int)
└── project
├── columns: v:6(int)
└── scan kv
└── columns: k:5(int!null) v:6(int) w:7(int) s:8(string)

build
SELECT sum(x) AS u FROM abxy GROUP BY u
----
error (42803): sum(): aggregate functions are not allowed in GROUP BY

# Implicit aliases should work too.
build
SELECT x + 1 FROM abxy GROUP BY "?column?"
----
group-by
├── columns: "?column?":5(int)
├── grouping columns: "?column?":5(int)
└── project
├── columns: "?column?":5(int)
├── scan abxy
│ └── columns: a:1(int!null) b:2(int!null) x:3(int) y:4(int)
└── projections
└── plus [type=int]
├── variable: x [type=int]
└── const: 1 [type=int]

build
SELECT sum(x) FROM abxy GROUP BY sum
----
error (42803): sum(): aggregate functions are not allowed in GROUP BY

# Ambiguous aliases should error out.
build
SELECT (x + 1) AS u, (y + 1) AS u FROM abxy GROUP BY u
----
error (42702): GROUP BY "u" is ambiguous

# In this case we would have had an outer column if it wasn't for the aliases;
# this should error out just the same.
build
SELECT x, (SELECT v AS x, w AS x FROM kv GROUP BY x) FROM abxy
----
error (42702): GROUP BY "x" is ambiguous

# Duplicate expressions with the same alias are not ambiguous.
build
SELECT (x + 1) AS u, (x + 1) AS u FROM abxy GROUP BY u
----
group-by
├── columns: u:5(int) u:5(int)
├── grouping columns: u:5(int)
└── project
├── columns: u:5(int)
├── scan abxy
│ └── columns: a:1(int!null) b:2(int!null) x:3(int) y:4(int)
└── projections
└── plus [type=int]
├── variable: x [type=int]
└── const: 1 [type=int]

build
SELECT (x + 1) AS u, (x + 1) AS u, (y + 1) AS u FROM abxy GROUP BY u
----
error (42702): GROUP BY "u" is ambiguous

# In this case, the FROM column has precedence.
build
SELECT sum(x + 1) AS x, sum(y + 1) AS x FROM abxy GROUP BY x
----
project
├── columns: x:6(decimal) x:8(decimal)
└── group-by
├── columns: x:3(int) sum:6(decimal) sum:8(decimal)
├── grouping columns: x:3(int)
├── project
│ ├── columns: column5:5(int) column7:7(int) x:3(int)
│ ├── scan abxy
│ │ └── columns: a:1(int!null) b:2(int!null) x:3(int) y:4(int)
│ └── projections
│ ├── plus [type=int]
│ │ ├── variable: x [type=int]
│ │ └── const: 1 [type=int]
│ └── plus [type=int]
│ ├── variable: y [type=int]
│ └── const: 1 [type=int]
└── aggregations
├── sum [type=decimal]
│ └── variable: column5 [type=int]
└── sum [type=decimal]
└── variable: column7 [type=int]

0 comments on commit 566be28

Please sign in to comment.