Skip to content

Commit

Permalink
sql: fix record-returning udfs when used as data source
Browse files Browse the repository at this point in the history
When record-returning UDFs (both implicit and `RECORD` return types) are
used as a data source in a query, the result should be treated as a row
with separate columns instead of a tuple, which is how UDF output is
normally treated. This PR closes this gap between CRDB and Postgres.

For example:

```
CREATE FUNCTION f() RETURNS RECORD AS
$$
  SELECT 1, 2, 3;
$$ LANGUAGE SQL

SELECT f()
    f
--------
 (1,2,3)

SELECT * FROM f() as foo(a int, b int, c int);
 a | b | c
---+---+---
 1 | 2 | 3
```

The behavior is the same for implicit record return types.

Epic: CRDB-19496
Fixes: cockroachdb#97059

Release note (sql change): UDFs with record-returning types now return a
row instead of a tuple.
  • Loading branch information
rharding6373 committed Mar 9, 2023
1 parent a95ffcd commit 22deff9
Show file tree
Hide file tree
Showing 11 changed files with 162 additions and 40 deletions.
63 changes: 58 additions & 5 deletions pkg/sql/logictest/testdata/logic_test/udf_record
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@ SELECT f_one();
----
(1)

# TODO(97059): The following query should require a column definition list.
statement ok
statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_one();

# TODO(97059): The following query should produce a row, not a tuple.
query T
query I
SELECT * FROM f_one() AS foo (a INT);
----
(1)
1

statement ok
CREATE FUNCTION f_const() RETURNS RECORD AS
Expand Down Expand Up @@ -130,3 +128,58 @@ query T
SELECT f_table();
----
(1,5)

# subtest datasource

statement ok
CREATE FUNCTION f_tup() RETURNS RECORD AS
$$
SELECT ROW(1, 2, 3);
$$ LANGUAGE SQL;

query T
SELECT f_tup();
----
(1,2,3)

statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_tup();

query III colnames
SELECT * FROM f_tup() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE FUNCTION f_col() RETURNS RECORD AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query T
SELECT f_col();
----
(1,2,3)

query III colnames
SELECT * FROM f_col() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE TABLE t_imp (a INT PRIMARY KEY, b INT);
INSERT INTO t_imp VALUES (1, 10), (2, 4), (3, 32);

statement ok
CREATE FUNCTION f_imp() RETURNS t_imp AS
$$
SELECT * FROM t_imp ORDER BY a LIMIT 1;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_imp();
----
a b
1 10
35 changes: 23 additions & 12 deletions pkg/sql/logictest/testdata/logic_test/udf_setof
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,36 @@ CREATE FUNCTION all_ab() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_tuple() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT (a, b) FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab_tuple()
----
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_record() RETURNS SETOF RECORD LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

query II rowsort
SELECT * FROM all_ab_tuple()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40
4 changes: 4 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ func (b *Builder) buildExistsSubquery(
types.Bool,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
),
tree.DBoolFalse,
}, types.Bool), nil
Expand Down Expand Up @@ -753,6 +754,7 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
), nil
}

Expand Down Expand Up @@ -809,6 +811,7 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
), nil
}

Expand Down Expand Up @@ -891,6 +894,7 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
udf.Typ,
enableStepping,
udf.CalledOnNullInput,
udf.MultiColOutput,
), nil
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/ops/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,9 @@ define UDFPrivate {
# inputs are NULL. If false, the function will not be evaluated in the
# presence of NULL inputs, and will instead evaluate directly to NULL.
CalledOnNullInput bool

# MultiColOutput is true if the function may return multiple columns.
MultiColOutput bool
}

# KVOptions is a set of KVOptionItems that specify arbitrary keys and values
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/optbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ type Builder struct {
// are disabled and only statements whitelisted are allowed.
insideFuncDef bool

// If set, we are processing a data source.
insideDataSource bool

// If set, we are collecting view dependencies in schemaDeps. This can only
// happen inside view/function definitions.
//
Expand Down
65 changes: 47 additions & 18 deletions pkg/sql/opt/optbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ func (b *Builder) buildUDF(
// Build an expression for each statement in the function body.
rels := make(memo.RelListExpr, len(stmts))
isSetReturning := o.Class == tree.GeneratorClass
isMultiColOutput := false
for i := range stmts {
stmtScope := b.buildStmt(stmts[i].AST, nil /* desiredTypes */, bodyScope)
expr := stmtScope.expr
Expand All @@ -711,30 +712,51 @@ func (b *Builder) buildUDF(
// result columns of the last statement. If the result column is a tuple,
// then use its tuple contents for the return instead.
isSingleTupleResult := len(stmtScope.cols) == 1 && stmtScope.cols[0].typ.Family() == types.TupleFamily
if types.IsRecordType(f.ResolvedType()) {
if types.IsRecordType(rtyp) {
if isSingleTupleResult {
f.ResolvedType().InternalType.TupleContents = stmtScope.cols[0].typ.TupleContents()
rtyp.InternalType.TupleContents = stmtScope.cols[0].typ.TupleContents()
rtyp.InternalType.TupleLabels = stmtScope.cols[0].typ.TupleLabels()
} else {
tc := make([]*types.T, len(stmtScope.cols))
tl := make([]string, len(stmtScope.cols))
for i, col := range stmtScope.cols {
tc[i] = col.typ
tl[i] = col.name.MetadataName()
}
f.ResolvedType().InternalType.TupleContents = tc
rtyp.InternalType.TupleContents = tc
rtyp.InternalType.TupleLabels = tl
}
}
if b.insideDataSource && (types.IsRecordType(rtyp) || rtyp.UserDefined()) {
isMultiColOutput = true
if isSingleTupleResult {
// When used as a data source, we need to expand the tuple into
// individual columns.
stmtScope = bodyScope.push()
cols := physProps.Presentation
elems := make([]scopeColumn, len(rtyp.TupleContents()))
for i := range rtyp.TupleContents() {
e := b.factory.ConstructColumnAccess(b.factory.ConstructVariable(cols[0].ID), memo.TupleOrdinal(i))
col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp.TupleContents()[i], nil, e)
elems[i] = *col
}
expr = b.constructProject(expr, elems)
physProps = stmtScope.makePhysicalProps()
}
}

// If there are multiple output columns or the output type is a record and
// the output column is not a tuple, we must combine them into a tuple -
// only a single column can be returned from a UDF.
cols := physProps.Presentation
if len(cols) > 1 || (types.IsRecordType(f.ResolvedType()) && !isSingleTupleResult) {
if !isMultiColOutput && (len(cols) > 1 || (types.IsRecordType(rtyp) && !isSingleTupleResult)) {
elems := make(memo.ScalarListExpr, len(cols))
for i := range cols {
elems[i] = b.factory.ConstructVariable(cols[i].ID)
}
tup := b.factory.ConstructTuple(elems, f.ResolvedType())
tup := b.factory.ConstructTuple(elems, rtyp)
stmtScope = bodyScope.push()
col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, tup)
col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, tup)
expr = b.constructProject(expr, []scopeColumn{*col})
physProps = stmtScope.makePhysicalProps()
}
Expand All @@ -747,17 +769,17 @@ func (b *Builder) buildUDF(
// column is already a tuple.
returnCol := physProps.Presentation[0].ID
returnColMeta := b.factory.Metadata().ColumnMeta(returnCol)
if !types.IsRecordType(f.ResolvedType()) && !returnColMeta.Type.Identical(f.ResolvedType()) {
if !cast.ValidCast(returnColMeta.Type, f.ResolvedType(), cast.ContextAssignment) {
if !types.IsRecordType(rtyp) && !isMultiColOutput && !returnColMeta.Type.Identical(rtyp) {
if !cast.ValidCast(returnColMeta.Type, rtyp, cast.ContextAssignment) {
panic(sqlerrors.NewInvalidAssignmentCastError(
returnColMeta.Type, f.ResolvedType(), returnColMeta.Alias))
returnColMeta.Type, rtyp, returnColMeta.Alias))
}
cast := b.factory.ConstructAssignmentCast(
b.factory.ConstructVariable(physProps.Presentation[0].ID),
f.ResolvedType(),
rtyp,
)
stmtScope = bodyScope.push()
col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, cast)
col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, cast)
expr = b.constructProject(expr, []scopeColumn{*col})
physProps = stmtScope.makePhysicalProps()
}
Expand All @@ -772,12 +794,13 @@ func (b *Builder) buildUDF(
out = b.factory.ConstructUDF(
args,
&memo.UDFPrivate{
Name: def.Name,
Params: params,
Body: rels,
Typ: f.ResolvedType(),
SetReturning: isSetReturning,
Volatility: o.Volatility,
Name: def.Name,
Params: params,
Body: rels,
Typ: f.ResolvedType(),
SetReturning: isSetReturning,
Volatility: o.Volatility,
MultiColOutput: isMultiColOutput,
},
)

Expand Down Expand Up @@ -819,10 +842,16 @@ func (b *Builder) buildUDF(
)
}

if outCol == nil && isMultiColOutput {
f.ResolvedOverload().ReturnsRecordType = types.IsRecordType(rtyp)
return b.finishBuildGeneratorFunction(f, f.ResolvedOverload(), out, inScope, outScope, outCol)
}
// Should return a single tuple.
// Synthesize an output column for set-returning UDFs.
if isSetReturning && outCol == nil {
if outCol == nil && isSetReturning {
outCol = b.synthesizeColumn(outScope, scopeColName(""), f.ResolvedType(), nil /* expr */, out)
}

return b.finishBuildScalar(f, out, inScope, outScope, outCol)
}

Expand Down
6 changes: 4 additions & 2 deletions pkg/sql/opt/optbuilder/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ import (
func (b *Builder) buildDataSource(
texpr tree.TableExpr, indexFlags *tree.IndexFlags, locking lockingSpec, inScope *scope,
) (outScope *scope) {
defer func(prevAtRoot bool) {
defer func(prevAtRoot bool, prevInsideDataSource bool) {
inScope.atRoot = prevAtRoot
}(inScope.atRoot)
b.insideDataSource = prevInsideDataSource
}(inScope.atRoot, b.insideDataSource)
inScope.atRoot = false
b.insideDataSource = true
// NB: The case statements are sorted lexicographically.
switch source := (texpr).(type) {
case *tree.AliasedTableExpr:
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/opt/optbuilder/srfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ func (b *Builder) buildZip(exprs tree.Exprs, inScope *scope) (outScope *scope) {
var outCol *scopeColumn
startCols := len(outScope.cols)

if def == nil || funcExpr.ResolvedOverload().Class != tree.GeneratorClass || b.shouldCreateDefaultColumn(texpr) {
isRecordReturningUDF := def != nil && funcExpr.ResolvedOverload().IsUDF && (texpr.ResolvedType().UserDefined() || types.IsRecordType(texpr.ResolvedType())) && b.insideDataSource

if def == nil || (funcExpr.ResolvedOverload().Class != tree.GeneratorClass && !isRecordReturningUDF) || (b.shouldCreateDefaultColumn(texpr) && !isRecordReturningUDF) {
if def != nil && len(funcExpr.ResolvedOverload().ReturnLabels) > 0 {
// Override the computed alias with the one defined in the ReturnLabels. This
// satisfies a Postgres quirk where some json functions use different labels
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/optbuilder/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ func (b *Builder) expandStarAndResolveType(
// example, the query `SELECT (x + 1) AS "x_incr" FROM t` has a projection with
// a synthesized column "x_incr".
//
// scope The scope is passed in so it can can be updated with the newly bound
// scope The scope is passed in so it can be updated with the newly bound
//
// variable.
//
Expand Down
12 changes: 11 additions & 1 deletion pkg/sql/routine.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,17 @@ func (g *routineGenerator) ResolvedType() *types.T {
// is cache-able (i.e., there are no arguments to the routine and stepping is
// disabled).
func (g *routineGenerator) Start(ctx context.Context, txn *kv.Txn) (err error) {
retTypes := []*types.T{g.expr.ResolvedType()}
rt := g.expr.ResolvedType()
var retTypes []*types.T
if g.expr.MultiColOutput {
// A routine with multiple output column has its types in a tuple.
retTypes = make([]*types.T, len(rt.TupleContents()))
for i, c := range rt.TupleContents() {
retTypes[i] = c
}
} else {
retTypes = []*types.T{g.expr.ResolvedType()}
}
g.rch.Init(ctx, retTypes, g.p.ExtendedEvalContext(), "routine" /* opName */)

// Configure stepping for volatile routines so that mutations made by the
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/sem/tree/routine.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ type RoutineExpr struct {
// Strict non-set-returning routines are not invoked when their arguments
// are NULL because optbuilder wraps them in a CASE expressions.
CalledOnNullInput bool

// MultiColOutput is true if the function may return multiple columns.
MultiColOutput bool
}

// NewTypedRoutineExpr returns a new RoutineExpr that is well-typed.
Expand All @@ -112,6 +115,7 @@ func NewTypedRoutineExpr(
typ *types.T,
enableStepping bool,
calledOnNullInput bool,
multiColOutput bool,
) *RoutineExpr {
return &RoutineExpr{
Args: args,
Expand All @@ -120,6 +124,7 @@ func NewTypedRoutineExpr(
EnableStepping: enableStepping,
Name: name,
CalledOnNullInput: calledOnNullInput,
MultiColOutput: multiColOutput,
}
}

Expand Down

0 comments on commit 22deff9

Please sign in to comment.