Skip to content

Commit

Permalink
Merge #98162
Browse files Browse the repository at this point in the history
98162: sql: fix record-returning udfs when used as data source r=rharding6373 a=rharding6373

When record-returning UDFs (both implicit and `RECORD` return types) are used as a data source in a query, the result should be treated as a row with separate columns instead of a tuple, which is how UDF output is normally treated. This PR closes this gap between CRDB and Postgres.

For example:

```
CREATE FUNCTION f() RETURNS RECORD AS
$$
  SELECT 1, 2, 3;
$$ LANGUAGE SQL

SELECT f()
    f
--------
 (1,2,3)

SELECT * FROM f() as foo(a int, b int, c int);
 a | b | c
---+---+---
 1 | 2 | 3
```

The behavior is the same for implicit record return types.

Epic: CRDB-19496
Fixes: #97059

Release note: None

Co-authored-by: rharding6373 <[email protected]>
  • Loading branch information
craig[bot] and rharding6373 committed Apr 24, 2023
2 parents 71c3539 + 4539072 commit a2f31ca
Show file tree
Hide file tree
Showing 21 changed files with 535 additions and 77 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/enums
Original file line number Diff line number Diff line change
Expand Up @@ -1768,7 +1768,7 @@ $$;
query TT
SELECT "🙏"('😊'), "🙏"(NULL:::"Emoji 😉")
----
NULL NULL
NULL mixed

statement ok
CREATE DATABASE "DB➕➕";
Expand Down
254 changes: 247 additions & 7 deletions pkg/sql/logictest/testdata/logic_test/udf_record
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@ SELECT f_one();
----
(1)

# TODO(#97059): The following query should require a column definition list.
statement ok
statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_one();

# TODO(#97059): The following query should produce a row, not a tuple.
query T
query I
SELECT * FROM f_one() AS foo (a INT);
----
(1)
1

statement ok
CREATE FUNCTION f_const() RETURNS RECORD AS
Expand All @@ -35,7 +33,7 @@ SELECT f_const();
(1,2.0,"welcome roacher","2021-07-12 17:02:10+00")

statement ok
CREATE FUNCTION f_arr() RETURNS RECORD AS
CREATE FUNCTION f_arr() RETURNS RECORD STABLE AS
$$
SELECT ARRAY[1, 2, 3];
$$ LANGUAGE SQL;
Expand Down Expand Up @@ -92,7 +90,7 @@ SELECT f_table();
(1,5)

statement ok
CREATE FUNCTION f_multitable() RETURNS RECORD AS
CREATE FUNCTION f_multitable() RETURNS RECORD STABLE AS
$$
SELECT t1.*, t2.* FROM t as t1 JOIN t as t2 on t1.a = t2.a ORDER BY t1.a LIMIT 1;
$$ LANGUAGE SQL;
Expand Down Expand Up @@ -130,3 +128,245 @@ query T
SELECT f_table();
----
(1,5)

subtest datasource

statement ok
CREATE FUNCTION f_tup() RETURNS RECORD AS
$$
SELECT ROW(1, 2, 3);
$$ LANGUAGE SQL;

query T
SELECT f_tup();
----
(1,2,3)

statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_tup();

query III colnames
SELECT * FROM f_tup() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE FUNCTION f_col() RETURNS RECORD AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query T
SELECT f_col();
----
(1,2,3)

query III colnames
SELECT * FROM f_col() as foo(a int, b int, c int);
----
a b c
1 2 3

query T
SELECT * FROM (VALUES (f_col())) as foo;
----
(1,2,3)

statement ok
CREATE TABLE t_imp (a INT PRIMARY KEY, b INT);
INSERT INTO t_imp VALUES (1, 10), (2, 4), (3, 32);

statement ok
CREATE FUNCTION f_imp() RETURNS t_imp AS
$$
SELECT * FROM t_imp ORDER BY a LIMIT 1;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_imp();
----
a b
1 10

statement ok
CREATE TYPE udt AS ENUM ('a', 'b', 'c');

statement ok
CREATE FUNCTION f_udt() RETURNS udt AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt();
----
a

statement ok
CREATE FUNCTION f_udt_record() RETURNS RECORD AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt() AS foo(u udt);
----
a

query II rowsort
SELECT * FROM f_setof() AS foo(a INT, b INT);
----
1 5
2 6
3 7

statement ok
CREATE FUNCTION f_setof_imp() RETURNS SETOF t_imp STABLE AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_imp()
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_strict() RETURNS RECORD STRICT AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query III
SELECT * FROM f_strict() AS foo(a INT, b INT, c INT);
----
1 2 3

statement ok
CREATE FUNCTION f_setof_strict() RETURNS SETOF RECORD STRICT STABLE AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_strict() AS foo(a INT, b INT);
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_strict_arg(IN a INT, IN b INT) RETURNS RECORD STRICT STABLE AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_strict_arg(1,2) AS foo(a INT, b INT);
----
a b
1 2

query II colnames
SELECT * FROM f_strict_arg(NULL, 2) AS foo(a INT, b INT);
----
a b
NULL NULL

query T
SELECT * FROM (SELECT f_strict_arg(1, 2));
----
(1,2)

statement ok
CREATE FUNCTION f_strict_arg_setof(IN a INT, IN b INT) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b FROM generate_series(1,3);
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_strict_arg_setof(1,2) AS foo(a INT, b INT);
----
a b
1 2
1 2
1 2

# Strict SETOF UDF with NULL input returns 0 rows.
query II colnames
SELECT * FROM f_strict_arg_setof(NULL,2) AS foo(a INT, b INT);
----
a b

statement ok
CREATE TABLE n (a INT PRIMARY KEY, b INT);
INSERT INTO n VALUES (1, 5), (2, NULL);

query III colnames
WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg(narg.input, 2) AS foo(a INT, b INT);
----
input a b
NULL NULL NULL

query III colnames
WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg_SETOF(narg.input, 2) AS foo(a INT, b INT);
----
input a b

statement ok
CREATE FUNCTION f_arg(IN a INT8, IN b INT8) RETURNS RECORD AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

query II
SELECT * FROM f_arg(1,2) AS foo(a INT, b INT);
----
1 2

# Test ambiguous function signatures with records
subtest ambiguity

# setof
statement ok
CREATE FUNCTION f_amb_setof(a INT8, b INT8) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

statement ok
CREATE FUNCTION f_amb_setof(a INT8, b STRING) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

# TODO(harding): In postgres, calls to f_amb_setof should succeed and return 0 rows.
statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
SELECT f_amb_setof(1, NULL);

statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
SELECT * FROM f_amb_setof(1, NULL) as foo(a int, b int);

statement ok
CREATE FUNCTION f_amb(a INT, b INT) RETURNS RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

statement ok
CREATE FUNCTION f_amb(a INT, b STRING) RETURNS RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

# TODO(harding): In postgres, calls to f_amb should succeed and return NULL.
statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
SELECT f_amb(1, NULL);

# TODO(harding): In postgres, calls to f_amb as a data source should succeed
# and return NULL NULL.
statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
SELECT * FROM f_amb(1, NULL) as foo(a int, b int);
35 changes: 23 additions & 12 deletions pkg/sql/logictest/testdata/logic_test/udf_setof
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,36 @@ CREATE FUNCTION all_ab() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_tuple() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT (a, b) FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab_tuple()
----
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_record() RETURNS SETOF RECORD LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

query II rowsort
SELECT * FROM all_ab_tuple()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40
8 changes: 8 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,8 @@ func (b *Builder) buildExistsSubquery(
types.Bool,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
),
tree.DBoolFalse,
}, types.Bool), nil
Expand Down Expand Up @@ -803,6 +805,8 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
), nil
}

Expand Down Expand Up @@ -855,6 +859,8 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
), nil
}

Expand Down Expand Up @@ -937,6 +943,8 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
udf.Typ,
enableStepping,
udf.CalledOnNullInput,
udf.MultiColDataSource,
udf.SetReturning,
), nil
}

Expand Down
8 changes: 7 additions & 1 deletion pkg/sql/opt/norm/inline_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr {
// 2. It has a single statement.
// 3. It is not a set-returning function.
// 4. Its arguments are only Variable or Const expressions.
// 5. It is not a record-returning function.
//
// UDFs with mutations (INSERT, UPDATE, UPSERT, DELETE) cannot be inlined, but
// we do not need an explicit check for this because immutable UDFs cannot
Expand All @@ -434,8 +435,13 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr {
// referenced in the UDF body more than once. Any argument that contains a
// subquery and is referenced multiple times cannot be inlined, unless new
// columns IDs for the entire subquery are generated (see #100915).
//
// TODO(harding): We could potentially loosen (5), since only record-returning
// UDFs used as data sources return multiple columns. Other UDFs returning a
// single column can be inlined since subqueries can only return a single
// column.
func (c *CustomFuncs) IsInlinableUDF(args memo.ScalarListExpr, udfp *memo.UDFPrivate) bool {
if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning {
if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning || udfp.MultiColDataSource {
return false
}
if !args.IsConstantsAndPlaceholdersAndVariables() {
Expand Down
Loading

0 comments on commit a2f31ca

Please sign in to comment.