From 4539072e48942b77721b1e3044235c1013266dc4 Mon Sep 17 00:00:00 2001
From: rharding6373 <rharding6373@users.noreply.github.com>
Date: Thu, 23 Feb 2023 13:08:25 -0800
Subject: [PATCH] sql: fix record-returning udfs when used as data source

When record-returning UDFs (both implicit and `RECORD` return types) are
used as a data source in a query, the result should be treated as a row
with separate columns instead of a tuple, which is how UDF output is
normally treated. This PR closes this gap between CRDB and Postgres.

For example:

```
CREATE FUNCTION f() RETURNS RECORD AS
$$
  SELECT 1, 2, 3;
$$ LANGUAGE SQL

SELECT f()
    f
--------
 (1,2,3)

SELECT * FROM f() as foo(a int, b int, c int);
 a | b | c
---+---+---
 1 | 2 | 3
```

The behavior is the same for implicit record return types.

Epic: CRDB-19496
Fixes: #97059

Release note (bug fix): Fixes the behavior of UDFs to return its results
as a row instead of a tuple when UDFs are called in a query as a data
source. This is now compatible with postgres behavior.
---
 pkg/sql/logictest/testdata/logic_test/enums   |   2 +-
 .../logictest/testdata/logic_test/udf_record  | 254 +++++++++++++++++-
 .../logictest/testdata/logic_test/udf_setof   |  35 ++-
 pkg/sql/opt/exec/execbuilder/scalar.go        |   8 +
 pkg/sql/opt/norm/inline_funcs.go              |   8 +-
 pkg/sql/opt/norm/testdata/rules/inline        |  24 +-
 pkg/sql/opt/norm/testdata/rules/udf           |  31 +--
 pkg/sql/opt/ops/scalar.opt                    |   5 +
 pkg/sql/opt/optbuilder/builder.go             |   3 +
 pkg/sql/opt/optbuilder/scalar.go              |  99 +++++--
 pkg/sql/opt/optbuilder/select.go              |  12 +-
 pkg/sql/opt/optbuilder/srfs.go                |   4 +-
 pkg/sql/opt/optbuilder/testdata/udf           |  67 +++++
 pkg/sql/opt/optbuilder/util.go                |   2 +-
 pkg/sql/routine.go                            |  13 +-
 pkg/sql/rowexec/project_set.go                |   8 +
 pkg/sql/sem/builtins/generator_builtins.go    |  14 +
 pkg/sql/sem/eval/generators.go                |   2 +-
 pkg/sql/sem/tree/expr.go                      |   4 +
 pkg/sql/sem/tree/routine.go                   |  10 +
 pkg/sql/sem/tree/type_check.go                |   7 +-
 21 files changed, 535 insertions(+), 77 deletions(-)

diff --git a/pkg/sql/logictest/testdata/logic_test/enums b/pkg/sql/logictest/testdata/logic_test/enums
index bcfe55675a0f..446aa2c02acf 100644
--- a/pkg/sql/logictest/testdata/logic_test/enums
+++ b/pkg/sql/logictest/testdata/logic_test/enums
@@ -1768,7 +1768,7 @@ $$;
 query TT
 SELECT "🙏"('😊'), "🙏"(NULL:::"Emoji 😉")
 ----
-NULL  NULL
+NULL  mixed
 
 statement ok
 CREATE DATABASE "DB➕➕";
diff --git a/pkg/sql/logictest/testdata/logic_test/udf_record b/pkg/sql/logictest/testdata/logic_test/udf_record
index 74c7b7efa966..2bc96e48f670 100644
--- a/pkg/sql/logictest/testdata/logic_test/udf_record
+++ b/pkg/sql/logictest/testdata/logic_test/udf_record
@@ -13,15 +13,13 @@ SELECT f_one();
 ----
 (1)
 
-# TODO(#97059): The following query should require a column definition list.
-statement ok
+statement error pq: a column definition list is required for functions returning \"record\"
 SELECT * FROM f_one();
 
-# TODO(#97059): The following query should produce a row, not a tuple.
-query T
+query I
 SELECT * FROM f_one() AS foo (a INT);
 ----
-(1)
+1
 
 statement ok
 CREATE FUNCTION f_const() RETURNS RECORD AS
@@ -35,7 +33,7 @@ SELECT f_const();
 (1,2.0,"welcome roacher","2021-07-12 17:02:10+00")
 
 statement ok
-CREATE FUNCTION f_arr() RETURNS RECORD AS
+CREATE FUNCTION f_arr() RETURNS RECORD STABLE AS
 $$
   SELECT ARRAY[1, 2, 3];
 $$ LANGUAGE SQL;
@@ -92,7 +90,7 @@ SELECT f_table();
 (1,5)
 
 statement ok
-CREATE FUNCTION f_multitable() RETURNS RECORD AS
+CREATE FUNCTION f_multitable() RETURNS RECORD STABLE AS
 $$
   SELECT t1.*, t2.* FROM t as t1 JOIN t as t2 on t1.a = t2.a ORDER BY t1.a LIMIT 1;
 $$ LANGUAGE SQL;
@@ -130,3 +128,245 @@ query T
 SELECT f_table();
 ----
 (1,5)
+
+subtest datasource
+
+statement ok
+CREATE FUNCTION f_tup() RETURNS RECORD AS
+$$
+  SELECT ROW(1, 2, 3);
+$$ LANGUAGE SQL;
+
+query T
+SELECT f_tup();
+----
+(1,2,3)
+
+statement error pq: a column definition list is required for functions returning \"record\"
+SELECT * FROM f_tup();
+
+query III colnames
+SELECT * FROM f_tup() as foo(a int, b int, c int);
+----
+a b c
+1 2 3
+
+statement ok
+CREATE FUNCTION f_col() RETURNS RECORD AS
+$$
+  SELECT 1, 2, 3;
+$$ LANGUAGE SQL;
+
+query T
+SELECT f_col();
+----
+(1,2,3)
+
+query III colnames
+SELECT * FROM f_col() as foo(a int, b int, c int);
+----
+a b c
+1 2 3
+
+query T
+SELECT * FROM (VALUES (f_col())) as foo;
+----
+(1,2,3)
+
+statement ok
+CREATE TABLE t_imp (a INT PRIMARY KEY, b INT);
+INSERT INTO t_imp VALUES (1, 10), (2, 4), (3, 32);
+
+statement ok
+CREATE FUNCTION f_imp() RETURNS t_imp AS
+$$
+  SELECT * FROM t_imp ORDER BY a LIMIT 1;
+$$ LANGUAGE SQL;
+
+query II colnames
+SELECT * FROM f_imp();
+----
+a b
+1 10
+
+statement ok
+CREATE TYPE udt AS ENUM ('a', 'b', 'c');
+
+statement ok
+CREATE FUNCTION f_udt() RETURNS udt AS
+$$
+  SELECT 'a'::udt;
+$$ LANGUAGE SQL;
+
+query T
+SELECT * FROM f_udt();
+----
+a
+
+statement ok
+CREATE FUNCTION f_udt_record() RETURNS RECORD AS
+$$
+  SELECT 'a'::udt;
+$$ LANGUAGE SQL;
+
+query T
+SELECT * FROM f_udt() AS foo(u udt);
+----
+a
+
+query II rowsort
+SELECT * FROM f_setof() AS foo(a INT, b INT);
+----
+1 5
+2 6
+3 7
+
+statement ok
+CREATE FUNCTION f_setof_imp() RETURNS SETOF t_imp STABLE AS
+$$
+  SELECT * FROM t_imp;
+$$ LANGUAGE SQL;
+
+query II rowsort
+SELECT * FROM f_setof_imp()
+----
+1 10
+2 4
+3 32
+
+statement ok
+CREATE FUNCTION f_strict() RETURNS RECORD STRICT AS
+$$
+  SELECT 1, 2, 3;
+$$ LANGUAGE SQL;
+
+query III
+SELECT * FROM f_strict() AS foo(a INT, b INT, c INT);
+----
+1 2 3
+
+statement ok
+CREATE FUNCTION f_setof_strict() RETURNS SETOF RECORD STRICT STABLE AS
+$$
+  SELECT * FROM t_imp;
+$$ LANGUAGE SQL;
+
+query II rowsort
+SELECT * FROM f_setof_strict() AS foo(a INT, b INT);
+----
+1 10
+2 4
+3 32
+
+statement ok
+CREATE FUNCTION f_strict_arg(IN a INT, IN b INT) RETURNS RECORD STRICT STABLE AS
+$$
+  SELECT a, b;
+$$ LANGUAGE SQL;
+
+query II colnames
+SELECT * FROM f_strict_arg(1,2) AS foo(a INT, b INT);
+----
+a b
+1 2
+
+query II colnames
+SELECT * FROM f_strict_arg(NULL, 2) AS foo(a INT, b INT);
+----
+a    b
+NULL NULL
+
+query T
+SELECT * FROM (SELECT f_strict_arg(1, 2));
+----
+(1,2)
+
+statement ok
+CREATE FUNCTION f_strict_arg_setof(IN a INT, IN b INT) RETURNS SETOF RECORD STRICT AS
+$$
+  SELECT a, b FROM generate_series(1,3);
+$$ LANGUAGE SQL;
+
+query II colnames
+SELECT * FROM f_strict_arg_setof(1,2) AS foo(a INT, b INT);
+----
+a b
+1 2
+1 2
+1 2
+
+# Strict SETOF UDF with NULL input returns 0 rows.
+query II colnames
+SELECT * FROM f_strict_arg_setof(NULL,2) AS foo(a INT, b INT);
+----
+a b
+
+statement ok
+CREATE TABLE n (a INT PRIMARY KEY, b INT);
+INSERT INTO n VALUES (1, 5), (2, NULL);
+
+query III colnames
+WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg(narg.input, 2) AS foo(a INT, b INT);
+----
+input a    b
+NULL NULL NULL
+
+query III colnames
+WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg_SETOF(narg.input, 2) AS foo(a INT, b INT);
+----
+input a b
+
+statement ok
+CREATE FUNCTION f_arg(IN a INT8, IN b INT8) RETURNS RECORD AS
+$$
+  SELECT a, b;
+$$ LANGUAGE SQL;
+
+query II
+SELECT * FROM f_arg(1,2) AS foo(a INT, b INT);
+----
+1 2
+
+# Test ambiguous function signatures with records
+subtest ambiguity
+
+# setof
+statement ok
+CREATE FUNCTION f_amb_setof(a INT8, b INT8) RETURNS SETOF RECORD STRICT AS
+$$
+  SELECT a, b;
+$$ LANGUAGE SQL;
+
+statement ok
+CREATE FUNCTION f_amb_setof(a INT8, b STRING) RETURNS SETOF RECORD STRICT AS
+$$
+  SELECT a, b;
+$$ LANGUAGE SQL;
+
+# TODO(harding): In postgres, calls to f_amb_setof should succeed and return 0 rows.
+statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
+SELECT f_amb_setof(1, NULL);
+
+statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
+SELECT * FROM f_amb_setof(1, NULL) as foo(a int, b int);
+
+statement ok
+CREATE FUNCTION f_amb(a INT, b INT) RETURNS RECORD STRICT AS
+$$
+  SELECT a, b;
+$$ LANGUAGE SQL;
+
+statement ok
+CREATE FUNCTION f_amb(a INT, b STRING) RETURNS RECORD STRICT AS
+$$
+  SELECT a, b;
+$$ LANGUAGE SQL;
+
+# TODO(harding): In postgres, calls to f_amb should succeed and return NULL.
+statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
+SELECT f_amb(1, NULL);
+
+# TODO(harding): In postgres, calls to f_amb as a data source should succeed
+# and return NULL NULL.
+statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
+SELECT * FROM f_amb(1, NULL) as foo(a int, b int);
diff --git a/pkg/sql/logictest/testdata/logic_test/udf_setof b/pkg/sql/logictest/testdata/logic_test/udf_setof
index 85a67877da44..5140b407f7ff 100644
--- a/pkg/sql/logictest/testdata/logic_test/udf_setof
+++ b/pkg/sql/logictest/testdata/logic_test/udf_setof
@@ -166,25 +166,36 @@ CREATE FUNCTION all_ab() RETURNS SETOF ab LANGUAGE SQL AS $$
   SELECT a, b FROM ab
 $$
 
-# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
-query T rowsort
+query II rowsort
 SELECT * FROM all_ab()
 ----
-(1,10)
-(2,20)
-(3,30)
-(4,40)
+1 10
+2 20
+3 30
+4 40
 
 statement ok
 CREATE FUNCTION all_ab_tuple() RETURNS SETOF ab LANGUAGE SQL AS $$
   SELECT (a, b) FROM ab
 $$
 
-# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
-query T rowsort
+query II rowsort
+SELECT * FROM all_ab_tuple()
+----
+1 10
+2 20
+3 30
+4 40
+
+statement ok
+CREATE FUNCTION all_ab_record() RETURNS SETOF RECORD LANGUAGE SQL AS $$
+  SELECT a, b FROM ab
+$$
+
+query II rowsort
 SELECT * FROM all_ab_tuple()
 ----
-(1,10)
-(2,20)
-(3,30)
-(4,40)
+1 10
+2 20
+3 30
+4 40
diff --git a/pkg/sql/opt/exec/execbuilder/scalar.go b/pkg/sql/opt/exec/execbuilder/scalar.go
index 4847a882867e..2de5e2a82f7b 100644
--- a/pkg/sql/opt/exec/execbuilder/scalar.go
+++ b/pkg/sql/opt/exec/execbuilder/scalar.go
@@ -695,6 +695,8 @@ func (b *Builder) buildExistsSubquery(
 				types.Bool,
 				false, /* enableStepping */
 				true,  /* calledOnNullInput */
+				false, /* multiColOutput */
+				false, /* generator */
 			),
 			tree.DBoolFalse,
 		}, types.Bool), nil
@@ -803,6 +805,8 @@ func (b *Builder) buildSubquery(
 			subquery.Typ,
 			false, /* enableStepping */
 			true,  /* calledOnNullInput */
+			false, /* multiColOutput */
+			false, /* generator */
 		), nil
 	}
 
@@ -855,6 +859,8 @@ func (b *Builder) buildSubquery(
 			subquery.Typ,
 			false, /* enableStepping */
 			true,  /* calledOnNullInput */
+			false, /* multiColOutput */
+			false, /* generator */
 		), nil
 	}
 
@@ -937,6 +943,8 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
 		udf.Typ,
 		enableStepping,
 		udf.CalledOnNullInput,
+		udf.MultiColDataSource,
+		udf.SetReturning,
 	), nil
 }
 
diff --git a/pkg/sql/opt/norm/inline_funcs.go b/pkg/sql/opt/norm/inline_funcs.go
index 8cac41f2d4f0..1c83b24ec723 100644
--- a/pkg/sql/opt/norm/inline_funcs.go
+++ b/pkg/sql/opt/norm/inline_funcs.go
@@ -411,6 +411,7 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr {
 //  2. It has a single statement.
 //  3. It is not a set-returning function.
 //  4. Its arguments are only Variable or Const expressions.
+//  5. It is not a record-returning function.
 //
 // UDFs with mutations (INSERT, UPDATE, UPSERT, DELETE) cannot be inlined, but
 // we do not need an explicit check for this because immutable UDFs cannot
@@ -434,8 +435,13 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr {
 // referenced in the UDF body more than once. Any argument that contains a
 // subquery and is referenced multiple times cannot be inlined, unless new
 // columns IDs for the entire subquery are generated (see #100915).
+//
+// TODO(harding): We could potentially loosen (5), since only record-returning
+// UDFs used as data sources return multiple columns. Other UDFs returning a
+// single column can be inlined since subqueries can only return a single
+// column.
 func (c *CustomFuncs) IsInlinableUDF(args memo.ScalarListExpr, udfp *memo.UDFPrivate) bool {
-	if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning {
+	if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning || udfp.MultiColDataSource {
 		return false
 	}
 	if !args.IsConstantsAndPlaceholdersAndVariables() {
diff --git a/pkg/sql/opt/norm/testdata/rules/inline b/pkg/sql/opt/norm/testdata/rules/inline
index 5f3c698afa1f..9d0b51692b9f 100644
--- a/pkg/sql/opt/norm/testdata/rules/inline
+++ b/pkg/sql/opt/norm/testdata/rules/inline
@@ -1327,7 +1327,7 @@ project
                 └── plus [type=int]
                      ├── variable: x:17 [type=int]
                      └── variable: y:18 [type=int]
-          ] [17 18] int false immutable true} [as=add:20, outer=(2), immutable, correlated-subquery, udf]
+          ] [17 18] int false immutable true false} [as=add:20, outer=(2), immutable, correlated-subquery, udf]
            ├── 10
            └── subquery
                 └── values
@@ -1579,7 +1579,7 @@ project
                      └── eq [type=bool]
                           ├── variable: y:10 [type=int]
                           └── const: 2 [type=int]
-          ] [9 10] bool false immutable true} [as=x_0_1_y_2:12, outer=(2), immutable, correlated-subquery, udf]
+          ] [9 10] bool false immutable true false} [as=x_0_1_y_2:12, outer=(2), immutable, correlated-subquery, udf]
            ├── subquery
            │    └── max1-row
            │         ├── columns: column1:8!null
@@ -1600,3 +1600,23 @@ project
            │              └── filters
            │                   └── column1:8 > i:2 [outer=(2,8), constraints=(/2: (/NULL - ]; /8: (/NULL - ])]
            └── 10
+
+exec-ddl
+CREATE FUNCTION x_y(x INT, y INT) RETURNS RECORD IMMUTABLE LANGUAGE SQL AS $$
+  SELECT x, y;
+$$
+----
+
+# Do not inline a UDF that outputs multiple columns.
+norm expect-not=InlineUDF
+SELECT * FROM x_y(1, 2) AS f(x INT, y INT);
+----
+project-set
+ ├── columns: x:5 y:6
+ ├── immutable
+ ├── values
+ │    ├── cardinality: [1 - 1]
+ │    ├── key: ()
+ │    └── ()
+ └── zip
+      └── x_y(1, 2) [immutable, udf]
diff --git a/pkg/sql/opt/norm/testdata/rules/udf b/pkg/sql/opt/norm/testdata/rules/udf
index 56e432053bfe..cc729d7952b5 100644
--- a/pkg/sql/opt/norm/testdata/rules/udf
+++ b/pkg/sql/opt/norm/testdata/rules/udf
@@ -24,7 +24,7 @@ values
                           └── const: 1
 
 exec-ddl
-CREATE FUNCTION strict_fn(i INT, t TEXT, b BOOl) RETURNS INT STRICT LANGUAGE SQL AS 'SELECT i'
+CREATE FUNCTION strict_fn(i INT, t TEXT, b BOOl) RETURNS INT STRICT IMMUTABLE LANGUAGE SQL AS 'SELECT i'
 ----
 
 # Strict UDFs should be folded to NULL in the presence of a constant NULL
@@ -33,10 +33,10 @@ norm format=show-scalars
 SELECT strict_fn(1, 'foo', NULL)
 ----
 values
- ├── columns: strict_fn:1
+ ├── columns: strict_fn:5
  ├── cardinality: [1 - 1]
  ├── key: ()
- ├── fd: ()-->(1)
+ ├── fd: ()-->(5)
  └── tuple
       └── null
 
@@ -48,23 +48,14 @@ SELECT strict_fn(1, 'foo', true)
 values
  ├── columns: strict_fn:5
  ├── cardinality: [1 - 1]
- ├── volatile
  ├── key: ()
  ├── fd: ()-->(5)
  └── tuple
-      └── udf: strict_fn
-           ├── strict
-           ├── params: i:1 t:2 b:3
-           ├── args
-           │    ├── const: 1
-           │    ├── const: 'foo'
-           │    └── true
-           └── body
-                └── values
-                     ├── columns: i:4
-                     ├── outer: (1)
-                     ├── cardinality: [1 - 1]
-                     ├── key: ()
-                     ├── fd: ()-->(4)
-                     └── tuple
-                          └── variable: i:1
+      └── subquery
+           └── values
+                ├── columns: i:4!null
+                ├── cardinality: [1 - 1]
+                ├── key: ()
+                ├── fd: ()-->(4)
+                └── tuple
+                     └── const: 1
diff --git a/pkg/sql/opt/ops/scalar.opt b/pkg/sql/opt/ops/scalar.opt
index eb39882cee14..827a68aec1bf 100644
--- a/pkg/sql/opt/ops/scalar.opt
+++ b/pkg/sql/opt/ops/scalar.opt
@@ -1282,6 +1282,11 @@ define UDFPrivate {
     # operators. Non-scalar UDFs are always in a project-set operator, while
     # scalar UDFs can be if used as a data source (e.g. SELECT * FROM udf()).
     CalledOnNullInput bool
+
+    # MultiColDataSource is true if the function may return multiple columns. This
+    # is only the case if the UDF returns a RECORD type and is used as a data
+    # source.
+    MultiColDataSource bool
 }
 
 # KVOptions is a set of KVOptionItems that specify arbitrary keys and values
diff --git a/pkg/sql/opt/optbuilder/builder.go b/pkg/sql/opt/optbuilder/builder.go
index 61f5ec53978f..835085027b8e 100644
--- a/pkg/sql/opt/optbuilder/builder.go
+++ b/pkg/sql/opt/optbuilder/builder.go
@@ -137,6 +137,9 @@ type Builder struct {
 	// within.
 	insideUDF bool
 
+	// insideDataSource is true when we are processing a data source.
+	insideDataSource bool
+
 	// If set, we are collecting view dependencies in schemaDeps. This can only
 	// happen inside view/function definitions.
 	//
diff --git a/pkg/sql/opt/optbuilder/scalar.go b/pkg/sql/opt/optbuilder/scalar.go
index 87a1fbe9fe20..665bd81fc96f 100644
--- a/pkg/sql/opt/optbuilder/scalar.go
+++ b/pkg/sql/opt/optbuilder/scalar.go
@@ -695,6 +695,7 @@ func (b *Builder) buildUDF(
 	// We'll need to track the depth of the UDFs we are building expressions
 	// within.
 	b.insideUDF = true
+	isMultiColDataSource := false
 	for i := range stmts {
 		stmtScope := b.buildStmt(stmts[i].AST, nil /* desiredTypes */, bodyScope)
 		expr := stmtScope.expr
@@ -715,34 +716,69 @@ func (b *Builder) buildUDF(
 				physProps.Ordering = props.OrderingChoice{}
 			}
 
-			// Replace the tuple contents of RECORD return types from Any to the
-			// result columns of the last statement. If the result column is a tuple,
-			// then use its tuple contents for the return instead.
+			// If returning a RECORD type, the function return type needs to be
+			// modified because when we first parse the CREATE FUNCTION, the RECORD
+			// is represented as a tuple with any types and execution requires the
+			// types to be concrete in order to decode them correctly. We can
+			// determine the types from the result columns or tuple of the last
+			// statement.
 			isSingleTupleResult := len(stmtScope.cols) == 1 && stmtScope.cols[0].typ.Family() == types.TupleFamily
-			if types.IsRecordType(f.ResolvedType()) {
+			if types.IsRecordType(rtyp) {
 				if isSingleTupleResult {
-					f.ResolvedType().InternalType.TupleContents = stmtScope.cols[0].typ.TupleContents()
+					// When the final statement returns a single tuple, we can use the
+					// tuple's types as the function return type.
+					rtyp = stmtScope.cols[0].typ
 				} else {
+					// Get the types from the individual columns of the last statement.
 					tc := make([]*types.T, len(stmtScope.cols))
+					tl := make([]string, len(stmtScope.cols))
 					for i, col := range stmtScope.cols {
 						tc[i] = col.typ
+						tl[i] = col.name.MetadataName()
 					}
-					f.ResolvedType().InternalType.TupleContents = tc
+					rtyp = types.MakeLabeledTuple(tc, tl)
 				}
+				f.SetTypeAnnotation(rtyp)
 			}
 
-			// If there are multiple output columns or the output type is a record and
-			// the output column is not a tuple, we must combine them into a tuple -
-			// only a single column can be returned from a UDF.
+			// Only a single column can be returned from a UDF, unless it is used as a
+			// data source. Data sources may output multiple columns, and if the
+			// statement body produces a tuple it needs to be expanded into columns.
+			// When not used as a data source, combine statements producing multiple
+			// columns into a tuple. If the last statement is already returning a
+			// tuple and the function has a record return type, then we do not need to
+			// wrap the output in another tuple.
 			cols := physProps.Presentation
-			if len(cols) > 1 || (types.IsRecordType(f.ResolvedType()) && !isSingleTupleResult) {
+			if b.insideDataSource && rtyp.Family() == types.TupleFamily {
+				// When the UDF is used as a data source and expects to output a tuple
+				// type, its output needs to be a row of columns instead of the usual
+				// tuple. If the last statement output a tuple, we need to expand the
+				// tuple into individual columns.
+				isMultiColDataSource = true
+				if isSingleTupleResult {
+					stmtScope = bodyScope.push()
+					elems := make([]scopeColumn, len(rtyp.TupleContents()))
+					for i := range rtyp.TupleContents() {
+						e := b.factory.ConstructColumnAccess(b.factory.ConstructVariable(cols[0].ID), memo.TupleOrdinal(i))
+						col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp.TupleContents()[i], nil, e)
+						elems[i] = *col
+					}
+					expr = b.constructProject(expr, elems)
+					physProps = stmtScope.makePhysicalProps()
+				}
+			} else if len(cols) > 1 || (types.IsRecordType(rtyp) && !isSingleTupleResult) {
+				// Only a single column can be returned from a UDF, unless it is used as a
+				// data source (see comment above). If there are multiple columns, combine
+				// them into a tuple. If the last statement is already returning a tuple
+				// and the function has a record return type, then do not wrap the
+				// output in another tuple.
 				elems := make(memo.ScalarListExpr, len(cols))
 				for i := range cols {
 					elems[i] = b.factory.ConstructVariable(cols[i].ID)
 				}
-				tup := b.factory.ConstructTuple(elems, f.ResolvedType())
+				tup := b.factory.ConstructTuple(elems, rtyp)
 				stmtScope = bodyScope.push()
-				col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, tup)
+				col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, tup)
 				expr = b.constructProject(expr, []scopeColumn{*col})
 				physProps = stmtScope.makePhysicalProps()
 			}
@@ -755,17 +791,17 @@ func (b *Builder) buildUDF(
 			// column is already a tuple.
 			returnCol := physProps.Presentation[0].ID
 			returnColMeta := b.factory.Metadata().ColumnMeta(returnCol)
-			if !types.IsRecordType(f.ResolvedType()) && !returnColMeta.Type.Identical(f.ResolvedType()) {
-				if !cast.ValidCast(returnColMeta.Type, f.ResolvedType(), cast.ContextAssignment) {
+			if !types.IsRecordType(rtyp) && !isMultiColDataSource && !returnColMeta.Type.Identical(rtyp) {
+				if !cast.ValidCast(returnColMeta.Type, rtyp, cast.ContextAssignment) {
 					panic(sqlerrors.NewInvalidAssignmentCastError(
-						returnColMeta.Type, f.ResolvedType(), returnColMeta.Alias))
+						returnColMeta.Type, rtyp, returnColMeta.Alias))
 				}
 				cast := b.factory.ConstructAssignmentCast(
 					b.factory.ConstructVariable(physProps.Presentation[0].ID),
-					f.ResolvedType(),
+					rtyp,
 				)
 				stmtScope = bodyScope.push()
-				col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, cast)
+				col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, cast)
 				expr = b.constructProject(expr, []scopeColumn{*col})
 				physProps = stmtScope.makePhysicalProps()
 			}
@@ -781,20 +817,29 @@ func (b *Builder) buildUDF(
 	out = b.factory.ConstructUDF(
 		args,
 		&memo.UDFPrivate{
-			Name:              def.Name,
-			Params:            params,
-			Body:              rels,
-			Typ:               f.ResolvedType(),
-			SetReturning:      isSetReturning,
-			Volatility:        o.Volatility,
-			CalledOnNullInput: o.CalledOnNullInput,
+			Name:               def.Name,
+			Params:             params,
+			Body:               rels,
+			Typ:                f.ResolvedType(),
+			SetReturning:       isSetReturning,
+			Volatility:         o.Volatility,
+			CalledOnNullInput:  o.CalledOnNullInput,
+			MultiColDataSource: isMultiColDataSource,
 		},
 	)
 
-	// Synthesize an output column for set-returning UDFs.
-	if isSetReturning && outCol == nil {
-		outCol = b.synthesizeColumn(outScope, scopeColName(""), f.ResolvedType(), nil /* expr */, out)
+	// Synthesize an output columns if necessary.
+	if outCol == nil {
+		if isMultiColDataSource {
+			// TODO(harding): Add the returns record property during create function.
+			f.ResolvedOverload().ReturnsRecordType = types.IsRecordType(rtyp)
+			return b.finishBuildGeneratorFunction(f, f.ResolvedOverload(), out, inScope, outScope, outCol)
+		}
+		if outScope != nil {
+			outCol = b.synthesizeColumn(outScope, scopeColName(""), f.ResolvedType(), nil /* expr */, out)
+		}
 	}
+
 	return b.finishBuildScalar(f, out, inScope, outScope, outCol)
 }
 
diff --git a/pkg/sql/opt/optbuilder/select.go b/pkg/sql/opt/optbuilder/select.go
index 1a3806d6fcc7..124454b34396 100644
--- a/pkg/sql/opt/optbuilder/select.go
+++ b/pkg/sql/opt/optbuilder/select.go
@@ -50,10 +50,12 @@ import (
 func (b *Builder) buildDataSource(
 	texpr tree.TableExpr, indexFlags *tree.IndexFlags, locking lockingSpec, inScope *scope,
 ) (outScope *scope) {
-	defer func(prevAtRoot bool) {
+	defer func(prevAtRoot bool, prevInsideDataSource bool) {
 		inScope.atRoot = prevAtRoot
-	}(inScope.atRoot)
+		b.insideDataSource = prevInsideDataSource
+	}(inScope.atRoot, b.insideDataSource)
 	inScope.atRoot = false
+	b.insideDataSource = true
 	// NB: The case statements are sorted lexicographically.
 	switch source := (texpr).(type) {
 	case *tree.AliasedTableExpr:
@@ -943,6 +945,12 @@ func (b *Builder) buildWithOrdinality(inScope *scope) (outScope *scope) {
 func (b *Builder) buildSelectStmt(
 	stmt tree.SelectStatement, locking lockingSpec, desiredTypes []*types.T, inScope *scope,
 ) (outScope *scope) {
+	// The top level in a select statement is not considered a data source.
+	oldInsideDataSource := b.insideDataSource
+	defer func() {
+		b.insideDataSource = oldInsideDataSource
+	}()
+	b.insideDataSource = false
 	// NB: The case statements are sorted lexicographically.
 	switch stmt := stmt.(type) {
 	case *tree.LiteralValuesClause:
diff --git a/pkg/sql/opt/optbuilder/srfs.go b/pkg/sql/opt/optbuilder/srfs.go
index a93cdc802304..039e79ab5959 100644
--- a/pkg/sql/opt/optbuilder/srfs.go
+++ b/pkg/sql/opt/optbuilder/srfs.go
@@ -111,7 +111,9 @@ func (b *Builder) buildZip(exprs tree.Exprs, inScope *scope) (outScope *scope) {
 		var outCol *scopeColumn
 		startCols := len(outScope.cols)
 
-		if def == nil || funcExpr.ResolvedOverload().Class != tree.GeneratorClass || b.shouldCreateDefaultColumn(texpr) {
+		isRecordReturningUDF := def != nil && funcExpr.ResolvedOverload().IsUDF && texpr.ResolvedType().Family() == types.TupleFamily && b.insideDataSource
+
+		if def == nil || (funcExpr.ResolvedOverload().Class != tree.GeneratorClass && !isRecordReturningUDF) || (b.shouldCreateDefaultColumn(texpr) && !isRecordReturningUDF) {
 			if def != nil && len(funcExpr.ResolvedOverload().ReturnLabels) > 0 {
 				// Override the computed alias with the one defined in the ReturnLabels. This
 				// satisfies a Postgres quirk where some json functions use different labels
diff --git a/pkg/sql/opt/optbuilder/testdata/udf b/pkg/sql/opt/optbuilder/testdata/udf
index c2121596d274..6e8ca2d3a858 100644
--- a/pkg/sql/opt/optbuilder/testdata/udf
+++ b/pkg/sql/opt/optbuilder/testdata/udf
@@ -1166,6 +1166,73 @@ project
                      └── tuple
                           └── const: 1
 
+exec-ddl
+CREATE FUNCTION strict_fn_record(i INT, t TEXT, b BOOl) RETURNS RECORD STRICT LANGUAGE SQL AS 'SELECT i, t, b'
+----
+
+build format=show-scalars
+SELECT * FROM strict_fn_record(1, 'foo', false) as bar(i INT, t TEXT, b BOOl)
+----
+project-set
+ ├── columns: i:7 t:8 b:9
+ ├── values
+ │    └── tuple
+ └── zip
+      └── udf: strict_fn_record
+           ├── strict
+           ├── params: i:1 t:2 b:3
+           ├── args
+           │    ├── const: 1
+           │    ├── const: 'foo'
+           │    └── false
+           └── body
+                └── limit
+                     ├── columns: i:4 t:5 b:6
+                     ├── project
+                     │    ├── columns: i:4 t:5 b:6
+                     │    ├── values
+                     │    │    └── tuple
+                     │    └── projections
+                     │         ├── variable: i:1 [as=i:4]
+                     │         ├── variable: t:2 [as=t:5]
+                     │         └── variable: b:3 [as=b:6]
+                     └── const: 1
+
+build format=show-scalars
+SELECT * FROM (SELECT strict_fn_record(1, 'foo', false)) as bar;
+----
+project
+ ├── columns: strict_fn_record:8
+ ├── values
+ │    └── tuple
+ └── projections
+      └── udf: strict_fn_record [as=strict_fn_record:8]
+           ├── strict
+           ├── params: i:1 t:2 b:3
+           ├── args
+           │    ├── const: 1
+           │    ├── const: 'foo'
+           │    └── false
+           └── body
+                └── project
+                     ├── columns: column7:7
+                     ├── limit
+                     │    ├── columns: i:4 t:5 b:6
+                     │    ├── project
+                     │    │    ├── columns: i:4 t:5 b:6
+                     │    │    ├── values
+                     │    │    │    └── tuple
+                     │    │    └── projections
+                     │    │         ├── variable: i:1 [as=i:4]
+                     │    │         ├── variable: t:2 [as=t:5]
+                     │    │         └── variable: b:3 [as=b:6]
+                     │    └── const: 1
+                     └── projections
+                          └── tuple [as=column7:7]
+                               ├── variable: i:4
+                               ├── variable: t:5
+                               └── variable: b:6
+
 
 # --------------------------------------------------
 # UDFs with * expressions.
diff --git a/pkg/sql/opt/optbuilder/util.go b/pkg/sql/opt/optbuilder/util.go
index b9755b7a66e9..4862e650ba6c 100644
--- a/pkg/sql/opt/optbuilder/util.go
+++ b/pkg/sql/opt/optbuilder/util.go
@@ -186,7 +186,7 @@ func (b *Builder) expandStarAndResolveType(
 // example, the query `SELECT (x + 1) AS "x_incr" FROM t` has a projection with
 // a synthesized column "x_incr".
 //
-// scope  The scope is passed in so it can can be updated with the newly bound
+// scope  The scope is passed in so it can be updated with the newly bound
 //
 //	variable.
 //
diff --git a/pkg/sql/routine.go b/pkg/sql/routine.go
index e8aad96d34a1..11a93dd5deb9 100644
--- a/pkg/sql/routine.go
+++ b/pkg/sql/routine.go
@@ -19,6 +19,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
 	"github.com/cockroachdb/cockroach/pkg/sql/types"
 	"github.com/cockroachdb/cockroach/pkg/util/tracing"
+	"github.com/cockroachdb/errors"
 )
 
 // EvalRoutineExpr returns the result of evaluating the routine. It calls the
@@ -121,7 +122,17 @@ func (g *routineGenerator) ResolvedType() *types.T {
 // is cache-able (i.e., there are no arguments to the routine and stepping is
 // disabled).
 func (g *routineGenerator) Start(ctx context.Context, txn *kv.Txn) (err error) {
-	retTypes := []*types.T{g.expr.ResolvedType()}
+	rt := g.expr.ResolvedType()
+	var retTypes []*types.T
+	if g.expr.MultiColOutput {
+		// A routine with multiple output column should have its types in a tuple.
+		if rt.Family() != types.TupleFamily {
+			return errors.AssertionFailedf("routine expected to return multiple columns")
+		}
+		retTypes = rt.TupleContents()
+	} else {
+		retTypes = []*types.T{g.expr.ResolvedType()}
+	}
 	g.rch.Init(ctx, retTypes, g.p.ExtendedEvalContext(), "routine" /* opName */)
 
 	// Configure stepping for volatile routines so that mutations made by the
diff --git a/pkg/sql/rowexec/project_set.go b/pkg/sql/rowexec/project_set.go
index 2ab9619a342c..969f1b0ce6ce 100644
--- a/pkg/sql/rowexec/project_set.go
+++ b/pkg/sql/rowexec/project_set.go
@@ -181,6 +181,14 @@ func (ps *projectSetProcessor) nextInputRow() (
 				gen, err = eval.GetFuncGenerator(ps.Ctx(), ps.EvalCtx, t)
 			case *tree.RoutineExpr:
 				gen, err = eval.GetRoutineGenerator(ps.Ctx(), ps.EvalCtx, t)
+				if err == nil && gen == nil && t.MultiColOutput && !t.Generator {
+					// If the routine will return multiple output columns, we expect the
+					// routine to return nulls for each column type instead of no rows, so
+					// we can't use the empty generator. Set-returning routines
+					// (i.e., Generators), have different behavior and are handled
+					// separately.
+					gen, err = builtins.NullGenerator(t.ResolvedType())
+				}
 			default:
 				return nil, nil, errors.AssertionFailedf("unexpected expression in project-set: %T", fn)
 			}
diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go
index 9b52997b698a..89a47b3fceac 100644
--- a/pkg/sql/sem/builtins/generator_builtins.go
+++ b/pkg/sql/sem/builtins/generator_builtins.go
@@ -1254,6 +1254,20 @@ func EmptyGenerator() eval.ValueGenerator {
 	return &arrayValueGenerator{array: tree.NewDArray(types.Any)}
 }
 
+// NullGenerator returns a new generator that returns a single row of nulls
+// corresponding to the types stored in the tuple typ.
+func NullGenerator(typ *types.T) (eval.ValueGenerator, error) {
+	if typ.Family() != types.TupleFamily {
+		return nil, errors.AssertionFailedf("generator expected to return multiple columns")
+	}
+	arrs := make([]*tree.DArray, len(typ.TupleContents()))
+	for i := range typ.TupleContents() {
+		arrs[i] = &tree.DArray{}
+		arrs[i].Array = tree.Datums{tree.DNull}
+	}
+	return &multipleArrayValueGenerator{arrays: arrs}, nil
+}
+
 // unaryValueGenerator supports the execution of crdb_internal.unary_table().
 type unaryValueGenerator struct {
 	done bool
diff --git a/pkg/sql/sem/eval/generators.go b/pkg/sql/sem/eval/generators.go
index 7a03a5812d08..8d12ce397a55 100644
--- a/pkg/sql/sem/eval/generators.go
+++ b/pkg/sql/sem/eval/generators.go
@@ -53,7 +53,7 @@ func GetRoutineGenerator(
 			if args[i] == tree.DNull {
 				// Strict routines (CalledOnNullInput=false) should not be
 				// invoked if any of their arguments are NULL. Return nil so
-				// that the EmptyGenerator is used.
+				// that the EmptyGenerator or NullGenerator is used.
 				return nil, nil
 			}
 		}
diff --git a/pkg/sql/sem/tree/expr.go b/pkg/sql/sem/tree/expr.go
index c8d56c0cfec6..5c17e3d4b58c 100644
--- a/pkg/sql/sem/tree/expr.go
+++ b/pkg/sql/sem/tree/expr.go
@@ -1327,6 +1327,10 @@ func (node *FuncExpr) IsVectorizeStreaming() bool {
 	return node.fnProps != nil && node.fnProps.VectorizeStreaming
 }
 
+func (node *FuncExpr) SetTypeAnnotation(t *types.T) {
+	node.typ = t
+}
+
 type funcType int
 
 // FuncExpr.Type
diff --git a/pkg/sql/sem/tree/routine.go b/pkg/sql/sem/tree/routine.go
index ec44f45fa4ba..384883fb510c 100644
--- a/pkg/sql/sem/tree/routine.go
+++ b/pkg/sql/sem/tree/routine.go
@@ -105,6 +105,12 @@ type RoutineExpr struct {
 	// Strict non-set-returning routines are not invoked when their arguments
 	// are NULL because optbuilder wraps them in a CASE expressions.
 	CalledOnNullInput bool
+
+	// MultiColOutput is true if the function may return multiple columns.
+	MultiColOutput bool
+
+	// Generator is true if the function may output a set of rows.
+	Generator bool
 }
 
 // NewTypedRoutineExpr returns a new RoutineExpr that is well-typed.
@@ -115,6 +121,8 @@ func NewTypedRoutineExpr(
 	typ *types.T,
 	enableStepping bool,
 	calledOnNullInput bool,
+	multiColOutput bool,
+	generator bool,
 ) *RoutineExpr {
 	return &RoutineExpr{
 		Args:              args,
@@ -123,6 +131,8 @@ func NewTypedRoutineExpr(
 		EnableStepping:    enableStepping,
 		Name:              name,
 		CalledOnNullInput: calledOnNullInput,
+		MultiColOutput:    multiColOutput,
+		Generator:         generator,
 	}
 }
 
diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go
index 85f4482a8051..fe99f42905b2 100644
--- a/pkg/sql/sem/tree/type_check.go
+++ b/pkg/sql/sem/tree/type_check.go
@@ -1082,6 +1082,7 @@ func (expr *FuncExpr) TypeCheck(
 		return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue, "%s()", def.Name)
 	}
 
+	var hasUDFOverload bool
 	var calledOnNullInputFns, notCalledOnNullInputFns intsets.Fast
 	for _, idx := range s.overloadIdxs {
 		if def.Overloads[idx].CalledOnNullInput {
@@ -1089,6 +1090,10 @@ func (expr *FuncExpr) TypeCheck(
 		} else {
 			notCalledOnNullInputFns.Add(int(idx))
 		}
+		// TODO(harding): Check if this is a record-returning UDF instead.
+		if def.Overloads[idx].IsUDF {
+			hasUDFOverload = true
+		}
 	}
 
 	// If the function is an aggregate that does not accept null arguments and we
@@ -1135,7 +1140,7 @@ func (expr *FuncExpr) TypeCheck(
 	// NULL arguments, the function isn't a generator or aggregate builtin, and
 	// NULL is given as an argument.
 	if len(s.overloadIdxs) > 0 && calledOnNullInputFns.Len() == 0 && funcCls != GeneratorClass &&
-		funcCls != AggregateClass {
+		funcCls != AggregateClass && !hasUDFOverload {
 		for _, expr := range s.typedExprs {
 			if expr.ResolvedType().Family() == types.UnknownFamily {
 				return DNull, nil