Skip to content

Commit

Permalink
sql: fix record-returning udfs when used as data source
Browse files Browse the repository at this point in the history
When record-returning UDFs (both implicit and `RECORD` return types) are
used as a data source in a query, the result should be treated as a row
with separate columns instead of a tuple, which is how UDF output is
normally treated. This PR closes this gap between CRDB and Postgres.

For example:

```
CREATE FUNCTION f() RETURNS RECORD AS
$$
  SELECT 1, 2, 3;
$$ LANGUAGE SQL

SELECT f()
    f
--------
 (1,2,3)

SELECT * FROM f() as foo(a int, b int, c int);
 a | b | c
---+---+---
 1 | 2 | 3
```

The behavior is the same for implicit record return types.

Epic: CRDB-19496
Fixes: cockroachdb#97059

Release note (bug fix): Fixes the behavior of UDFs to return its results
as a row instead of a tuple when UDFs are called in a query as a data
source. This is now compatible with postgres behavior.
  • Loading branch information
rharding6373 committed Apr 13, 2023
1 parent 773f7de commit 781366a
Show file tree
Hide file tree
Showing 19 changed files with 443 additions and 50 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/enums
Original file line number Diff line number Diff line change
Expand Up @@ -1768,7 +1768,7 @@ $$;
query TT
SELECT "🙏"('😊'), "🙏"(NULL:::"Emoji 😉")
----
NULL NULL
NULL mixed

statement ok
CREATE DATABASE "DB➕➕";
Expand Down
244 changes: 237 additions & 7 deletions pkg/sql/logictest/testdata/logic_test/udf_record
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@ SELECT f_one();
----
(1)

# TODO(97059): The following query should require a column definition list.
statement ok
statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_one();

# TODO(97059): The following query should produce a row, not a tuple.
query T
query I
SELECT * FROM f_one() AS foo (a INT);
----
(1)
1

statement ok
CREATE FUNCTION f_const() RETURNS RECORD AS
Expand All @@ -35,7 +33,7 @@ SELECT f_const();
(1,2.0,"welcome roacher","2021-07-12 17:02:10+00")

statement ok
CREATE FUNCTION f_arr() RETURNS RECORD AS
CREATE FUNCTION f_arr() RETURNS RECORD STABLE AS
$$
SELECT ARRAY[1, 2, 3];
$$ LANGUAGE SQL;
Expand Down Expand Up @@ -92,7 +90,7 @@ SELECT f_table();
(1,5)

statement ok
CREATE FUNCTION f_multitable() RETURNS RECORD AS
CREATE FUNCTION f_multitable() RETURNS RECORD STABLE AS
$$
SELECT t1.*, t2.* FROM t as t1 JOIN t as t2 on t1.a = t2.a ORDER BY t1.a LIMIT 1;
$$ LANGUAGE SQL;
Expand Down Expand Up @@ -130,3 +128,235 @@ query T
SELECT f_table();
----
(1,5)

subtest datasource

statement ok
CREATE FUNCTION f_tup() RETURNS RECORD AS
$$
SELECT ROW(1, 2, 3);
$$ LANGUAGE SQL;

query T
SELECT f_tup();
----
(1,2,3)

statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_tup();

query III colnames
SELECT * FROM f_tup() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE FUNCTION f_col() RETURNS RECORD AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query T
SELECT f_col();
----
(1,2,3)

query III colnames
SELECT * FROM f_col() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE TABLE t_imp (a INT PRIMARY KEY, b INT);
INSERT INTO t_imp VALUES (1, 10), (2, 4), (3, 32);

statement ok
CREATE FUNCTION f_imp() RETURNS t_imp AS
$$
SELECT * FROM t_imp ORDER BY a LIMIT 1;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_imp();
----
a b
1 10

statement ok
CREATE TYPE udt AS ENUM ('a', 'b', 'c');

statement ok
CREATE FUNCTION f_udt() RETURNS udt AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt();
----
a

statement ok
CREATE FUNCTION f_udt_record() RETURNS RECORD AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt() AS foo(u udt);
----
a

query II rowsort
SELECT * FROM f_setof() AS foo(a INT, b INT);
----
1 5
2 6
3 7

statement ok
CREATE FUNCTION f_setof_imp() RETURNS SETOF t_imp STABLE AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_imp()
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_strict() RETURNS RECORD STRICT AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query III
SELECT * FROM f_strict() AS foo(a INT, b INT, c INT);
----
1 2 3

statement ok
CREATE FUNCTION f_setof_strict() RETURNS SETOF RECORD STRICT STABLE AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_strict() AS foo(a INT, b INT);
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_strict_arg(IN a INT, IN b INT) RETURNS RECORD STRICT STABLE AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_strict_arg(1,2) AS foo(a INT, b INT);
----
a b
1 2

query II colnames
SELECT * FROM f_strict_arg(NULL, 2) AS foo(a INT, b INT);
----
a b
NULL NULL

statement ok
CREATE FUNCTION f_strict_arg_setof(IN a INT, IN b INT) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b FROM generate_series(1,3);
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_strict_arg_setof(1,2) AS foo(a INT, b INT);
----
a b
1 2
1 2
1 2

# Strict SETOF UDF with NULL input returns 0 rows.
query II colnames
SELECT * FROM f_strict_arg_setof(NULL,2) AS foo(a INT, b INT);
----
a b

statement ok
CREATE TABLE n (a INT PRIMARY KEY, b INT);
INSERT INTO n VALUES (1, 5), (2, NULL);

query III colnames
WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg(narg.input, 2) AS foo(a INT, b INT);
----
input a b
NULL NULL NULL

query III colnames
WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg_SETOF(narg.input, 2) AS foo(a INT, b INT);
----
input a b

statement ok
CREATE FUNCTION f_arg(IN a INT8, IN b INT8) RETURNS RECORD AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

query II
SELECT * FROM f_arg(1,2) AS foo(a INT, b INT);
----
1 2

# Test ambiguous function signatures with records
subtest ambiguity

# setof
statement ok
CREATE FUNCTION f_amb_setof(a INT8, b INT8) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

statement ok
CREATE FUNCTION f_amb_setof(a INT8, b STRING) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

# TODO(harding): In postgres, calls to f_amb_setof should succeed and return 0 rows.
statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
SELECT f_amb_setof(1, NULL);

statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
SELECT * FROM f_amb_setof(1, NULL) as foo(a int, b int);

statement ok
CREATE FUNCTION f_amb(a INT, b INT) RETURNS RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

statement ok
CREATE FUNCTION f_amb(a INT, b STRING) RETURNS RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

# TODO(harding): In postgres, calls to f_amb should succeed and return NULL.
statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
SELECT f_amb(1, NULL);

# TODO(harding): In postgres, calls to f_amb as a data source should succeed
# and return NULL NULL.
statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
SELECT * FROM f_amb(1, NULL) as foo(a int, b int);
35 changes: 23 additions & 12 deletions pkg/sql/logictest/testdata/logic_test/udf_setof
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,36 @@ CREATE FUNCTION all_ab() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_tuple() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT (a, b) FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab_tuple()
----
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_record() RETURNS SETOF RECORD LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

query II rowsort
SELECT * FROM all_ab_tuple()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40
8 changes: 8 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,8 @@ func (b *Builder) buildExistsSubquery(
types.Bool,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
),
tree.DBoolFalse,
}, types.Bool), nil
Expand Down Expand Up @@ -779,6 +781,8 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
), nil
}

Expand Down Expand Up @@ -831,6 +835,8 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
), nil
}

Expand Down Expand Up @@ -913,6 +919,8 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
udf.Typ,
enableStepping,
udf.CalledOnNullInput,
udf.MultiColOutput,
udf.SetReturning,
), nil
}

Expand Down
9 changes: 8 additions & 1 deletion pkg/sql/opt/norm/inline_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,11 +412,15 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr {
// 2. It has a single statement.
// 3. Its arguments are non-volatile expressions.
// 4. It is not a set-returning function.
// 5. It is not a record-returning function.
//
// UDFs with mutations (INSERT, UPDATE, UPSERT, DELETE) cannot be inlined, but
// we do not need an explicit check for this because immutable UDFs cannot
// contain mutations.
//
// We cannot inline record-returning functions, because subqueries can only
// return a single column.
//
// TODO(mgartner): We may be able to loosen (1) and (3). Subqueries are always
// evaluated just once, so by converting a UDF to a subquery we effectively make
// it and it's arguments non-volatile. So, if UDFs can be inlined in some other
Expand All @@ -428,8 +432,11 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr {
// strict UDF that is not called when an argument is NULL. This presents a
// challenge because we cannot wrap a set-returning function in a CASE
// expression, like we do for strict, non-set-returning functions.
//
// TODO(harding): We could potentially loosen (5), since only record-returning
// UDFs used as data sources return multiple columns.
func (c *CustomFuncs) IsInlinableUDF(args memo.ScalarListExpr, udfp *memo.UDFPrivate) bool {
if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning {
if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning || udfp.MultiColOutput {
return false
}
for i := range args {
Expand Down
Loading

0 comments on commit 781366a

Please sign in to comment.