Skip to content

Commit

Permalink
sql: prefer aggregate overloads with string inputs
Browse files Browse the repository at this point in the history
Prior to this commit, the type checking code was not able to choose between
different possible aggregate overloads if the arguments had type unknown.
This commit changes the logic to match Postgres, which always prefers
overloads with arguments of type string if available.

Note that this commit still doesn't completely match Postgres' behavior,
because it doesn't handle the case when there are no overloads available
with string inputs for the arguments with unknown type. If there are no
overloads with string arguments, Postgres chooses the overload with
preferred type for the given category. For example, float8 is the preferred
type for the numeric category in Postgres. Since we don't support the concept
of preferred types within type categories, supporting this behavior will be
a more involved change. For now, this commit should cover most of our
supported aggregates.

Release justification: low risk, high benefit change to existing
functionality.
Release note (sql change): the type checking code now prefers aggregate
overloads with string inputs if there are multiple possible candidates
due to arguments of unknown type.
  • Loading branch information
rytaft committed Mar 30, 2020
1 parent 64c2fe7 commit 420da7a
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 32 deletions.
12 changes: 8 additions & 4 deletions pkg/sql/logictest/testdata/logic_test/typing
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,12 @@ SELECT NULLIF(0, NULL) + NULLIF(0, NULL)
0

# Regression test for #46196.
query error ambiguous call: max\(unknown\)
SELECT MAX(t0.c0) FROM (VALUES (NULL), (NULL)) t0(c0)
query T
SELECT max(t0.c0) FROM (VALUES (NULL), (NULL)) t0(c0)
----
NULL

query error ambiguous call: max\(unknown\)
SELECT MAX(NULL) FROM (VALUES (NULL), (NULL)) t0(c0)
query T
SELECT max(NULL) FROM (VALUES (NULL), (NULL)) t0(c0)
----
NULL
49 changes: 21 additions & 28 deletions pkg/sql/opt/optbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -2706,12 +2706,9 @@ scalar-group-by
build
SELECT corr(k, NULL) FROM kv
----
project
├── columns: corr:5
├── scan kv
│ └── columns: k:1!null v:2 w:3 s:4
└── projections
└── NULL [as=corr:5]
error (42725): ambiguous call: corr(int, unknown), candidates are:
corr(int, int) -> float
corr(int, float) -> float

build
SELECT corr('foo', v) FROM kv
Expand Down Expand Up @@ -3840,25 +3837,21 @@ project
└── projections
└── (max:4, unnest:1) [as="?column?":6]

# Regression test for #46196.
build
SELECT MAX(t0.c0) FROM (VALUES (NULL), (NULL)) t0(c0);
----
error (42725): ambiguous call: max(unknown), candidates are:
max(int) -> int
max(float) -> float
max(decimal) -> decimal
max(date) -> date
max(timestamp) -> timestamp
max(interval) -> interval
max(string) -> string
max(bytes) -> bytes
max(timestamptz) -> timestamptz
max(oid) -> oid
max(uuid) -> uuid
max(inet) -> inet
max(time) -> time
max(timetz) -> timetz
max(jsonb) -> jsonb
max(varbit) -> varbit
max(bool) -> bool
# Regression test for #46196. Don't eliminate the scalar group by, and
# default to type string.
build format=show-types
SELECT max(t0.c0) FROM (VALUES (NULL), (NULL)) t0(c0);
----
scalar-group-by
├── columns: max:3(string)
├── project
│ ├── columns: column2:2(string)
│ ├── values
│ │ ├── columns: column1:1(unknown)
│ │ ├── (NULL,) [type=tuple{unknown}]
│ │ └── (NULL,) [type=tuple{unknown}]
│ └── projections
│ └── column1:1::STRING [as=column2:2, type=string]
└── aggregations
└── max [as=max:3, type=string]
└── column2:2 [type=string]
35 changes: 35 additions & 0 deletions pkg/sql/sem/tree/type_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,41 @@ func (expr *FuncExpr) TypeCheck(ctx *SemaContext, desired *types.T) (TypedExpr,
"%s()", def.Name)
}

// If the function is an aggregate that does not accept null arguments and we
// have arguments of unknown type, see if we can assign type string instead.
// TODO(rytaft): If there are no overloads with string inputs, Postgres
// chooses the overload with preferred type for the given category. For
// example, float8 is the preferred type for the numeric category in Postgres.
// To match Postgres' behavior, we should add that logic here too.
if !def.NullableArgs && def.FunctionProperties.Class == AggregateClass {
for i := range typedSubExprs {
if typedSubExprs[i].ResolvedType().Family() == types.UnknownFamily {
var filtered []overloadImpl
for j := range fns {
if fns[j].params().GetAt(i).Equivalent(types.String) {
if filtered == nil {
filtered = make([]overloadImpl, 0, len(fns)-j)
}
filtered = append(filtered, fns[j])
}
}

// Only use the filtered list if it's not empty.
if filtered != nil {
fns = filtered

// Cast the expression to a string so the execution engine will find
// the correct overload.
e, err := NewTypedCastExpr(typedSubExprs[i], types.String)
if err != nil {
return nil, err
}
typedSubExprs[i] = e
}
}
}
}

// Return NULL if at least one overload is possible, no overload accepts
// NULL arguments, the function isn't a generator or aggregate builtin, and
// NULL is given as an argument.
Expand Down

0 comments on commit 420da7a

Please sign in to comment.