diff --git a/pkg/sql/logictest/testdata/logic_test/enums b/pkg/sql/logictest/testdata/logic_test/enums index bcfe55675a0f..446aa2c02acf 100644 --- a/pkg/sql/logictest/testdata/logic_test/enums +++ b/pkg/sql/logictest/testdata/logic_test/enums @@ -1768,7 +1768,7 @@ $$; query TT SELECT "🙏"('😊'), "🙏"(NULL:::"Emoji 😉") ---- -NULL NULL +NULL mixed statement ok CREATE DATABASE "DB➕➕"; diff --git a/pkg/sql/logictest/testdata/logic_test/udf_record b/pkg/sql/logictest/testdata/logic_test/udf_record index 10f4352acdc6..1a049daed562 100644 --- a/pkg/sql/logictest/testdata/logic_test/udf_record +++ b/pkg/sql/logictest/testdata/logic_test/udf_record @@ -13,15 +13,13 @@ SELECT f_one(); ---- (1) -# TODO(97059): The following query should require a column definition list. -statement ok +statement error pq: a column definition list is required for functions returning \"record\" SELECT * FROM f_one(); -# TODO(97059): The following query should produce a row, not a tuple. -query T +query I SELECT * FROM f_one() AS foo (a INT); ---- -(1) +1 statement ok CREATE FUNCTION f_const() RETURNS RECORD AS @@ -35,7 +33,7 @@ SELECT f_const(); (1,2.0,"welcome roacher","2021-07-12 17:02:10+00") statement ok -CREATE FUNCTION f_arr() RETURNS RECORD AS +CREATE FUNCTION f_arr() RETURNS RECORD STABLE AS $$ SELECT ARRAY[1, 2, 3]; $$ LANGUAGE SQL; @@ -92,7 +90,7 @@ SELECT f_table(); (1,5) statement ok -CREATE FUNCTION f_multitable() RETURNS RECORD AS +CREATE FUNCTION f_multitable() RETURNS RECORD STABLE AS $$ SELECT t1.*, t2.* FROM t as t1 JOIN t as t2 on t1.a = t2.a ORDER BY t1.a LIMIT 1; $$ LANGUAGE SQL; @@ -130,3 +128,235 @@ query T SELECT f_table(); ---- (1,5) + +subtest datasource + +statement ok +CREATE FUNCTION f_tup() RETURNS RECORD AS +$$ + SELECT ROW(1, 2, 3); +$$ LANGUAGE SQL; + +query T +SELECT f_tup(); +---- +(1,2,3) + +statement error pq: a column definition list is required for functions returning \"record\" +SELECT * FROM f_tup(); + +query III colnames +SELECT * FROM f_tup() as foo(a int, b int, c int); +---- +a b c +1 2 3 + +statement ok +CREATE FUNCTION f_col() RETURNS RECORD AS +$$ + SELECT 1, 2, 3; +$$ LANGUAGE SQL; + +query T +SELECT f_col(); +---- +(1,2,3) + +query III colnames +SELECT * FROM f_col() as foo(a int, b int, c int); +---- +a b c +1 2 3 + +statement ok +CREATE TABLE t_imp (a INT PRIMARY KEY, b INT); +INSERT INTO t_imp VALUES (1, 10), (2, 4), (3, 32); + +statement ok +CREATE FUNCTION f_imp() RETURNS t_imp AS +$$ + SELECT * FROM t_imp ORDER BY a LIMIT 1; +$$ LANGUAGE SQL; + +query II colnames +SELECT * FROM f_imp(); +---- +a b +1 10 + +statement ok +CREATE TYPE udt AS ENUM ('a', 'b', 'c'); + +statement ok +CREATE FUNCTION f_udt() RETURNS udt AS +$$ + SELECT 'a'::udt; +$$ LANGUAGE SQL; + +query T +SELECT * FROM f_udt(); +---- +a + +statement ok +CREATE FUNCTION f_udt_record() RETURNS RECORD AS +$$ + SELECT 'a'::udt; +$$ LANGUAGE SQL; + +query T +SELECT * FROM f_udt() AS foo(u udt); +---- +a + +query II rowsort +SELECT * FROM f_setof() AS foo(a INT, b INT); +---- +1 5 +2 6 +3 7 + +statement ok +CREATE FUNCTION f_setof_imp() RETURNS SETOF t_imp STABLE AS +$$ + SELECT * FROM t_imp; +$$ LANGUAGE SQL; + +query II rowsort +SELECT * FROM f_setof_imp() +---- +1 10 +2 4 +3 32 + +statement ok +CREATE FUNCTION f_strict() RETURNS RECORD STRICT AS +$$ + SELECT 1, 2, 3; +$$ LANGUAGE SQL; + +query III +SELECT * FROM f_strict() AS foo(a INT, b INT, c INT); +---- +1 2 3 + +statement ok +CREATE FUNCTION f_setof_strict() RETURNS SETOF RECORD STRICT STABLE AS +$$ + SELECT * FROM t_imp; +$$ LANGUAGE SQL; + +query II rowsort +SELECT * FROM f_setof_strict() AS foo(a INT, b INT); +---- +1 10 +2 4 +3 32 + +statement ok +CREATE FUNCTION f_strict_arg(IN a INT, IN b INT) RETURNS RECORD STRICT STABLE AS +$$ + SELECT a, b; +$$ LANGUAGE SQL; + +query II colnames +SELECT * FROM f_strict_arg(1,2) AS foo(a INT, b INT); +---- +a b +1 2 + +query II colnames +SELECT * FROM f_strict_arg(NULL, 2) AS foo(a INT, b INT); +---- +a b +NULL NULL + +statement ok +CREATE FUNCTION f_strict_arg_setof(IN a INT, IN b INT) RETURNS SETOF RECORD STRICT AS +$$ + SELECT a, b FROM generate_series(1,3); +$$ LANGUAGE SQL; + +query II colnames +SELECT * FROM f_strict_arg_setof(1,2) AS foo(a INT, b INT); +---- +a b +1 2 +1 2 +1 2 + +# Strict SETOF UDF with NULL input returns 0 rows. +query II colnames +SELECT * FROM f_strict_arg_setof(NULL,2) AS foo(a INT, b INT); +---- +a b + +statement ok +CREATE TABLE n (a INT PRIMARY KEY, b INT); +INSERT INTO n VALUES (1, 5), (2, NULL); + +query III colnames +WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg(narg.input, 2) AS foo(a INT, b INT); +---- +input a b +NULL NULL NULL + +query III colnames +WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg_SETOF(narg.input, 2) AS foo(a INT, b INT); +---- +input a b + +statement ok +CREATE FUNCTION f_arg(IN a INT8, IN b INT8) RETURNS RECORD AS +$$ + SELECT a, b; +$$ LANGUAGE SQL; + +query II +SELECT * FROM f_arg(1,2) AS foo(a INT, b INT); +---- +1 2 + +# Test ambiguous function signatures with records +subtest ambiguity + +# setof +statement ok +CREATE FUNCTION f_amb_setof(a INT8, b INT8) RETURNS SETOF RECORD STRICT AS +$$ + SELECT a, b; +$$ LANGUAGE SQL; + +statement ok +CREATE FUNCTION f_amb_setof(a INT8, b STRING) RETURNS SETOF RECORD STRICT AS +$$ + SELECT a, b; +$$ LANGUAGE SQL; + +# TODO(harding): In postgres, calls to f_amb_setof should succeed and return 0 rows. +statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are +SELECT f_amb_setof(1, NULL); + +statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are +SELECT * FROM f_amb_setof(1, NULL) as foo(a int, b int); + +statement ok +CREATE FUNCTION f_amb(a INT, b INT) RETURNS RECORD STRICT AS +$$ + SELECT a, b; +$$ LANGUAGE SQL; + +statement ok +CREATE FUNCTION f_amb(a INT, b STRING) RETURNS RECORD STRICT AS +$$ + SELECT a, b; +$$ LANGUAGE SQL; + +# TODO(harding): In postgres, calls to f_amb should succeed and return NULL. +statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are +SELECT f_amb(1, NULL); + +# TODO(harding): In postgres, calls to f_amb as a data source should succeed +# and return NULL NULL. +statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are +SELECT * FROM f_amb(1, NULL) as foo(a int, b int); diff --git a/pkg/sql/logictest/testdata/logic_test/udf_setof b/pkg/sql/logictest/testdata/logic_test/udf_setof index 85a67877da44..5140b407f7ff 100644 --- a/pkg/sql/logictest/testdata/logic_test/udf_setof +++ b/pkg/sql/logictest/testdata/logic_test/udf_setof @@ -166,25 +166,36 @@ CREATE FUNCTION all_ab() RETURNS SETOF ab LANGUAGE SQL AS $$ SELECT a, b FROM ab $$ -# TODO(mgartner): This should return separate columns, not a tuple. See #97059. -query T rowsort +query II rowsort SELECT * FROM all_ab() ---- -(1,10) -(2,20) -(3,30) -(4,40) +1 10 +2 20 +3 30 +4 40 statement ok CREATE FUNCTION all_ab_tuple() RETURNS SETOF ab LANGUAGE SQL AS $$ SELECT (a, b) FROM ab $$ -# TODO(mgartner): This should return separate columns, not a tuple. See #97059. -query T rowsort +query II rowsort +SELECT * FROM all_ab_tuple() +---- +1 10 +2 20 +3 30 +4 40 + +statement ok +CREATE FUNCTION all_ab_record() RETURNS SETOF RECORD LANGUAGE SQL AS $$ + SELECT a, b FROM ab +$$ + +query II rowsort SELECT * FROM all_ab_tuple() ---- -(1,10) -(2,20) -(3,30) -(4,40) +1 10 +2 20 +3 30 +4 40 diff --git a/pkg/sql/opt/exec/execbuilder/scalar.go b/pkg/sql/opt/exec/execbuilder/scalar.go index d813ef0322e8..8e764aff8063 100644 --- a/pkg/sql/opt/exec/execbuilder/scalar.go +++ b/pkg/sql/opt/exec/execbuilder/scalar.go @@ -691,6 +691,8 @@ func (b *Builder) buildExistsSubquery( types.Bool, false, /* enableStepping */ true, /* calledOnNullInput */ + false, /* multiColOutput */ + false, /* generator */ ), tree.DBoolFalse, }, types.Bool), nil @@ -779,6 +781,8 @@ func (b *Builder) buildSubquery( subquery.Typ, false, /* enableStepping */ true, /* calledOnNullInput */ + false, /* multiColOutput */ + false, /* generator */ ), nil } @@ -831,6 +835,8 @@ func (b *Builder) buildSubquery( subquery.Typ, false, /* enableStepping */ true, /* calledOnNullInput */ + false, /* multiColOutput */ + false, /* generator */ ), nil } @@ -913,6 +919,8 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ udf.Typ, enableStepping, udf.CalledOnNullInput, + udf.MultiColOutput, + udf.SetReturning, ), nil } diff --git a/pkg/sql/opt/norm/inline_funcs.go b/pkg/sql/opt/norm/inline_funcs.go index 9d5f02bd2795..c276d9f0fc34 100644 --- a/pkg/sql/opt/norm/inline_funcs.go +++ b/pkg/sql/opt/norm/inline_funcs.go @@ -412,11 +412,15 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr { // 2. It has a single statement. // 3. Its arguments are non-volatile expressions. // 4. It is not a set-returning function. +// 5. It is not a record-returning function. // // UDFs with mutations (INSERT, UPDATE, UPSERT, DELETE) cannot be inlined, but // we do not need an explicit check for this because immutable UDFs cannot // contain mutations. // +// We cannot inline record-returning functions, because subqueries can only +// return a single column. +// // TODO(mgartner): We may be able to loosen (1) and (3). Subqueries are always // evaluated just once, so by converting a UDF to a subquery we effectively make // it and it's arguments non-volatile. So, if UDFs can be inlined in some other @@ -428,8 +432,11 @@ func (c *CustomFuncs) InlineConstVar(f memo.FiltersExpr) memo.FiltersExpr { // strict UDF that is not called when an argument is NULL. This presents a // challenge because we cannot wrap a set-returning function in a CASE // expression, like we do for strict, non-set-returning functions. +// +// TODO(harding): We could potentially loosen (5), since only record-returning +// UDFs used as data sources return multiple columns. func (c *CustomFuncs) IsInlinableUDF(args memo.ScalarListExpr, udfp *memo.UDFPrivate) bool { - if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning { + if udfp.Volatility == volatility.Volatile || len(udfp.Body) != 1 || udfp.SetReturning || udfp.MultiColOutput { return false } for i := range args { diff --git a/pkg/sql/opt/norm/testdata/rules/udf b/pkg/sql/opt/norm/testdata/rules/udf index f73ced1db00b..310b0d453928 100644 --- a/pkg/sql/opt/norm/testdata/rules/udf +++ b/pkg/sql/opt/norm/testdata/rules/udf @@ -33,10 +33,10 @@ norm format=show-scalars SELECT strict_fn(1, 'foo', NULL) ---- values - ├── columns: strict_fn:1 + ├── columns: strict_fn:5 ├── cardinality: [1 - 1] ├── key: () - ├── fd: ()-->(1) + ├── fd: ()-->(5) └── tuple └── null diff --git a/pkg/sql/opt/ops/scalar.opt b/pkg/sql/opt/ops/scalar.opt index eb39882cee14..6902be7081ea 100644 --- a/pkg/sql/opt/ops/scalar.opt +++ b/pkg/sql/opt/ops/scalar.opt @@ -1282,6 +1282,9 @@ define UDFPrivate { # operators. Non-scalar UDFs are always in a project-set operator, while # scalar UDFs can be if used as a data source (e.g. SELECT * FROM udf()). CalledOnNullInput bool + + # MultiColOutput is true if the function may return multiple columns. + MultiColOutput bool } # KVOptions is a set of KVOptionItems that specify arbitrary keys and values diff --git a/pkg/sql/opt/optbuilder/builder.go b/pkg/sql/opt/optbuilder/builder.go index 61f5ec53978f..835085027b8e 100644 --- a/pkg/sql/opt/optbuilder/builder.go +++ b/pkg/sql/opt/optbuilder/builder.go @@ -137,6 +137,9 @@ type Builder struct { // within. insideUDF bool + // insideDataSource is true when we are processing a data source. + insideDataSource bool + // If set, we are collecting view dependencies in schemaDeps. This can only // happen inside view/function definitions. // diff --git a/pkg/sql/opt/optbuilder/scalar.go b/pkg/sql/opt/optbuilder/scalar.go index bc89a9c5981c..6ba3e7a57b80 100644 --- a/pkg/sql/opt/optbuilder/scalar.go +++ b/pkg/sql/opt/optbuilder/scalar.go @@ -695,6 +695,7 @@ func (b *Builder) buildUDF( // We'll need to track the depth of the UDFs we are building expressions // within. b.insideUDF = true + isMultiColOutput := false for i := range stmts { stmtScope := b.buildStmt(stmts[i].AST, nil /* desiredTypes */, bodyScope) expr := stmtScope.expr @@ -715,34 +716,69 @@ func (b *Builder) buildUDF( physProps.Ordering = props.OrderingChoice{} } - // Replace the tuple contents of RECORD return types from Any to the - // result columns of the last statement. If the result column is a tuple, - // then use its tuple contents for the return instead. + // If returning a RECORD type, the function return type needs to be + // modified because when we first parse the CREATE FUNCTION, the RECORD + // is represented as a tuple with any types and execution requires the + // types to be concrete in order to decode them correctly. We can + // determine the types from the result columns or tuple of the last + // statement. isSingleTupleResult := len(stmtScope.cols) == 1 && stmtScope.cols[0].typ.Family() == types.TupleFamily - if types.IsRecordType(f.ResolvedType()) { + if types.IsRecordType(rtyp) { if isSingleTupleResult { - f.ResolvedType().InternalType.TupleContents = stmtScope.cols[0].typ.TupleContents() + // When the final statement returns a single tuple, we can use the + // tuple's types as the function return type. + rtyp = stmtScope.cols[0].typ } else { + // Get the types from the individual columns of the last statement. tc := make([]*types.T, len(stmtScope.cols)) + tl := make([]string, len(stmtScope.cols)) for i, col := range stmtScope.cols { tc[i] = col.typ + tl[i] = col.name.MetadataName() } - f.ResolvedType().InternalType.TupleContents = tc + rtyp = types.MakeLabeledTuple(tc, tl) } + f.SetTypeAnnotation(rtyp) } - // If there are multiple output columns or the output type is a record and - // the output column is not a tuple, we must combine them into a tuple - - // only a single column can be returned from a UDF. + // Only a single column can be returned from a UDF, unless it is used as a + // data source. Data sources may output multiple columns, and if the + // statement body produces a tuple it needs to be expanded into columns. + // When not used as a data source, combine statements producing multiple + // columns into a tuple. If the last statement is already returning a + // tuple and the function has a record return type, then we do not need to + // wrap the output in another tuple. cols := physProps.Presentation - if len(cols) > 1 || (types.IsRecordType(f.ResolvedType()) && !isSingleTupleResult) { + if b.insideDataSource && rtyp.Family() == types.TupleFamily { + // When the UDF is used as a data source and expects to output a tuple + // type, its output needs to be a row of columns instead of the usual + // tuple. If the last statement output a tuple, we need to expand the + // tuple into individual columns. + isMultiColOutput = true + if isSingleTupleResult { + stmtScope = bodyScope.push() + elems := make([]scopeColumn, len(rtyp.TupleContents())) + for i := range rtyp.TupleContents() { + e := b.factory.ConstructColumnAccess(b.factory.ConstructVariable(cols[0].ID), memo.TupleOrdinal(i)) + col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp.TupleContents()[i], nil, e) + elems[i] = *col + } + expr = b.constructProject(expr, elems) + physProps = stmtScope.makePhysicalProps() + } + } else if len(cols) > 1 || (types.IsRecordType(rtyp) && !isSingleTupleResult) { + // Only a single column can be returned from a UDF, unless it is used as a + // data source (see comment above). If there are multiple columns, combine + // them into a tuple. If the last statement is already returning a tuple + // and the function has a record return type, then do not wrap the + // output in another tuple. elems := make(memo.ScalarListExpr, len(cols)) for i := range cols { elems[i] = b.factory.ConstructVariable(cols[i].ID) } - tup := b.factory.ConstructTuple(elems, f.ResolvedType()) + tup := b.factory.ConstructTuple(elems, rtyp) stmtScope = bodyScope.push() - col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, tup) + col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, tup) expr = b.constructProject(expr, []scopeColumn{*col}) physProps = stmtScope.makePhysicalProps() } @@ -755,17 +791,17 @@ func (b *Builder) buildUDF( // column is already a tuple. returnCol := physProps.Presentation[0].ID returnColMeta := b.factory.Metadata().ColumnMeta(returnCol) - if !types.IsRecordType(f.ResolvedType()) && !returnColMeta.Type.Identical(f.ResolvedType()) { - if !cast.ValidCast(returnColMeta.Type, f.ResolvedType(), cast.ContextAssignment) { + if !types.IsRecordType(rtyp) && !isMultiColOutput && !returnColMeta.Type.Identical(rtyp) { + if !cast.ValidCast(returnColMeta.Type, rtyp, cast.ContextAssignment) { panic(sqlerrors.NewInvalidAssignmentCastError( - returnColMeta.Type, f.ResolvedType(), returnColMeta.Alias)) + returnColMeta.Type, rtyp, returnColMeta.Alias)) } cast := b.factory.ConstructAssignmentCast( b.factory.ConstructVariable(physProps.Presentation[0].ID), - f.ResolvedType(), + rtyp, ) stmtScope = bodyScope.push() - col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, cast) + col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, cast) expr = b.constructProject(expr, []scopeColumn{*col}) physProps = stmtScope.makePhysicalProps() } @@ -788,6 +824,7 @@ func (b *Builder) buildUDF( SetReturning: isSetReturning, Volatility: o.Volatility, CalledOnNullInput: o.CalledOnNullInput, + MultiColOutput: isMultiColOutput, }, ) @@ -799,7 +836,7 @@ func (b *Builder) buildUDF( // // For strict, set-returning UDFs, the evaluation logic achieves this // behavior. - if !isSetReturning && !o.CalledOnNullInput && len(args) > 0 { + if !isSetReturning && !isMultiColOutput && !o.CalledOnNullInput && len(args) > 0 { var anyArgIsNull opt.ScalarExpr for i := range args { // Note: We do NOT use a TupleIsNullExpr here if the argument is a @@ -829,10 +866,18 @@ func (b *Builder) buildUDF( ) } - // Synthesize an output column for set-returning UDFs. - if isSetReturning && outCol == nil { - outCol = b.synthesizeColumn(outScope, scopeColName(""), f.ResolvedType(), nil /* expr */, out) + // Synthesize an output columns if necessary. + if outCol == nil { + if isMultiColOutput { + // TODO(harding): Add the returns record property during create function. + f.ResolvedOverload().ReturnsRecordType = types.IsRecordType(rtyp) + return b.finishBuildGeneratorFunction(f, f.ResolvedOverload(), out, inScope, outScope, outCol) + } + if outScope != nil { + outCol = b.synthesizeColumn(outScope, scopeColName(""), f.ResolvedType(), nil /* expr */, out) + } } + return b.finishBuildScalar(f, out, inScope, outScope, outCol) } diff --git a/pkg/sql/opt/optbuilder/select.go b/pkg/sql/opt/optbuilder/select.go index 0325ce381ad0..a8a2e2fc9c77 100644 --- a/pkg/sql/opt/optbuilder/select.go +++ b/pkg/sql/opt/optbuilder/select.go @@ -49,10 +49,12 @@ import ( func (b *Builder) buildDataSource( texpr tree.TableExpr, indexFlags *tree.IndexFlags, locking lockingSpec, inScope *scope, ) (outScope *scope) { - defer func(prevAtRoot bool) { + defer func(prevAtRoot bool, prevInsideDataSource bool) { inScope.atRoot = prevAtRoot - }(inScope.atRoot) + b.insideDataSource = prevInsideDataSource + }(inScope.atRoot, b.insideDataSource) inScope.atRoot = false + b.insideDataSource = true // NB: The case statements are sorted lexicographically. switch source := (texpr).(type) { case *tree.AliasedTableExpr: diff --git a/pkg/sql/opt/optbuilder/srfs.go b/pkg/sql/opt/optbuilder/srfs.go index a93cdc802304..039e79ab5959 100644 --- a/pkg/sql/opt/optbuilder/srfs.go +++ b/pkg/sql/opt/optbuilder/srfs.go @@ -111,7 +111,9 @@ func (b *Builder) buildZip(exprs tree.Exprs, inScope *scope) (outScope *scope) { var outCol *scopeColumn startCols := len(outScope.cols) - if def == nil || funcExpr.ResolvedOverload().Class != tree.GeneratorClass || b.shouldCreateDefaultColumn(texpr) { + isRecordReturningUDF := def != nil && funcExpr.ResolvedOverload().IsUDF && texpr.ResolvedType().Family() == types.TupleFamily && b.insideDataSource + + if def == nil || (funcExpr.ResolvedOverload().Class != tree.GeneratorClass && !isRecordReturningUDF) || (b.shouldCreateDefaultColumn(texpr) && !isRecordReturningUDF) { if def != nil && len(funcExpr.ResolvedOverload().ReturnLabels) > 0 { // Override the computed alias with the one defined in the ReturnLabels. This // satisfies a Postgres quirk where some json functions use different labels diff --git a/pkg/sql/opt/optbuilder/testdata/udf b/pkg/sql/opt/optbuilder/testdata/udf index 38aa37f6910d..f87e8abb1e99 100644 --- a/pkg/sql/opt/optbuilder/testdata/udf +++ b/pkg/sql/opt/optbuilder/testdata/udf @@ -1167,6 +1167,36 @@ project │ └── variable: i:10 [as=i:13] └── const: 1 +exec-ddl +CREATE FUNCTION strict_fn_record(i INT, t TEXT, b BOOl) RETURNS RECORD STRICT LANGUAGE SQL AS 'SELECT i, t, b' +---- + +build format=show-scalars +SELECT * FROM strict_fn_record(1, 'foo', false) as bar(i INT, t TEXT, b BOOl) +---- +project-set + ├── columns: i:7 t:8 b:9 + ├── values + │ └── tuple + └── zip + └── udf: strict_fn_record + ├── params: i:1 t:2 b:3 + ├── args + │ ├── const: 1 + │ ├── const: 'foo' + │ └── false + └── body + └── limit + ├── columns: i:4 t:5 b:6 + ├── project + │ ├── columns: i:4 t:5 b:6 + │ ├── values + │ │ └── tuple + │ └── projections + │ ├── variable: i:1 [as=i:4] + │ ├── variable: t:2 [as=t:5] + │ └── variable: b:3 [as=b:6] + └── const: 1 # -------------------------------------------------- # UDFs with * expressions. diff --git a/pkg/sql/opt/optbuilder/util.go b/pkg/sql/opt/optbuilder/util.go index b9755b7a66e9..4862e650ba6c 100644 --- a/pkg/sql/opt/optbuilder/util.go +++ b/pkg/sql/opt/optbuilder/util.go @@ -186,7 +186,7 @@ func (b *Builder) expandStarAndResolveType( // example, the query `SELECT (x + 1) AS "x_incr" FROM t` has a projection with // a synthesized column "x_incr". // -// scope The scope is passed in so it can can be updated with the newly bound +// scope The scope is passed in so it can be updated with the newly bound // // variable. // diff --git a/pkg/sql/routine.go b/pkg/sql/routine.go index 873b662a5bee..0beb1991e67e 100644 --- a/pkg/sql/routine.go +++ b/pkg/sql/routine.go @@ -19,6 +19,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/tracing" + "github.com/cockroachdb/errors" ) // EvalRoutineExpr returns the result of evaluating the routine. It calls the @@ -111,7 +112,17 @@ func (g *routineGenerator) ResolvedType() *types.T { // is cache-able (i.e., there are no arguments to the routine and stepping is // disabled). func (g *routineGenerator) Start(ctx context.Context, txn *kv.Txn) (err error) { - retTypes := []*types.T{g.expr.ResolvedType()} + rt := g.expr.ResolvedType() + var retTypes []*types.T + if g.expr.MultiColOutput { + // A routine with multiple output column should have its types in a tuple. + if rt.Family() != types.TupleFamily { + return errors.AssertionFailedf("routine expected to return multiple columns") + } + retTypes = rt.TupleContents() + } else { + retTypes = []*types.T{g.expr.ResolvedType()} + } g.rch.Init(ctx, retTypes, g.p.ExtendedEvalContext(), "routine" /* opName */) // Configure stepping for volatile routines so that mutations made by the diff --git a/pkg/sql/rowexec/project_set.go b/pkg/sql/rowexec/project_set.go index 2ab9619a342c..969f1b0ce6ce 100644 --- a/pkg/sql/rowexec/project_set.go +++ b/pkg/sql/rowexec/project_set.go @@ -181,6 +181,14 @@ func (ps *projectSetProcessor) nextInputRow() ( gen, err = eval.GetFuncGenerator(ps.Ctx(), ps.EvalCtx, t) case *tree.RoutineExpr: gen, err = eval.GetRoutineGenerator(ps.Ctx(), ps.EvalCtx, t) + if err == nil && gen == nil && t.MultiColOutput && !t.Generator { + // If the routine will return multiple output columns, we expect the + // routine to return nulls for each column type instead of no rows, so + // we can't use the empty generator. Set-returning routines + // (i.e., Generators), have different behavior and are handled + // separately. + gen, err = builtins.NullGenerator(t.ResolvedType()) + } default: return nil, nil, errors.AssertionFailedf("unexpected expression in project-set: %T", fn) } diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go index 39a1be8ff685..0790d9e61aeb 100644 --- a/pkg/sql/sem/builtins/generator_builtins.go +++ b/pkg/sql/sem/builtins/generator_builtins.go @@ -1254,6 +1254,20 @@ func EmptyGenerator() eval.ValueGenerator { return &arrayValueGenerator{array: tree.NewDArray(types.Any)} } +// NullGenerator returns a new generator that returns a single row of nulls +// corresponding to the types stored in the tuple typ. +func NullGenerator(typ *types.T) (eval.ValueGenerator, error) { + if typ.Family() != types.TupleFamily { + return nil, errors.AssertionFailedf("generator expected to return multiple columns") + } + arrs := make([]*tree.DArray, len(typ.TupleContents())) + for i := range typ.TupleContents() { + arrs[i] = &tree.DArray{} + arrs[i].Array = tree.Datums{tree.DNull} + } + return &multipleArrayValueGenerator{arrays: arrs}, nil +} + // unaryValueGenerator supports the execution of crdb_internal.unary_table(). type unaryValueGenerator struct { done bool diff --git a/pkg/sql/sem/tree/expr.go b/pkg/sql/sem/tree/expr.go index c8d56c0cfec6..5c17e3d4b58c 100644 --- a/pkg/sql/sem/tree/expr.go +++ b/pkg/sql/sem/tree/expr.go @@ -1327,6 +1327,10 @@ func (node *FuncExpr) IsVectorizeStreaming() bool { return node.fnProps != nil && node.fnProps.VectorizeStreaming } +func (node *FuncExpr) SetTypeAnnotation(t *types.T) { + node.typ = t +} + type funcType int // FuncExpr.Type diff --git a/pkg/sql/sem/tree/routine.go b/pkg/sql/sem/tree/routine.go index ec44f45fa4ba..384883fb510c 100644 --- a/pkg/sql/sem/tree/routine.go +++ b/pkg/sql/sem/tree/routine.go @@ -105,6 +105,12 @@ type RoutineExpr struct { // Strict non-set-returning routines are not invoked when their arguments // are NULL because optbuilder wraps them in a CASE expressions. CalledOnNullInput bool + + // MultiColOutput is true if the function may return multiple columns. + MultiColOutput bool + + // Generator is true if the function may output a set of rows. + Generator bool } // NewTypedRoutineExpr returns a new RoutineExpr that is well-typed. @@ -115,6 +121,8 @@ func NewTypedRoutineExpr( typ *types.T, enableStepping bool, calledOnNullInput bool, + multiColOutput bool, + generator bool, ) *RoutineExpr { return &RoutineExpr{ Args: args, @@ -123,6 +131,8 @@ func NewTypedRoutineExpr( EnableStepping: enableStepping, Name: name, CalledOnNullInput: calledOnNullInput, + MultiColOutput: multiColOutput, + Generator: generator, } } diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go index 85f4482a8051..fe99f42905b2 100644 --- a/pkg/sql/sem/tree/type_check.go +++ b/pkg/sql/sem/tree/type_check.go @@ -1082,6 +1082,7 @@ func (expr *FuncExpr) TypeCheck( return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue, "%s()", def.Name) } + var hasUDFOverload bool var calledOnNullInputFns, notCalledOnNullInputFns intsets.Fast for _, idx := range s.overloadIdxs { if def.Overloads[idx].CalledOnNullInput { @@ -1089,6 +1090,10 @@ func (expr *FuncExpr) TypeCheck( } else { notCalledOnNullInputFns.Add(int(idx)) } + // TODO(harding): Check if this is a record-returning UDF instead. + if def.Overloads[idx].IsUDF { + hasUDFOverload = true + } } // If the function is an aggregate that does not accept null arguments and we @@ -1135,7 +1140,7 @@ func (expr *FuncExpr) TypeCheck( // NULL arguments, the function isn't a generator or aggregate builtin, and // NULL is given as an argument. if len(s.overloadIdxs) > 0 && calledOnNullInputFns.Len() == 0 && funcCls != GeneratorClass && - funcCls != AggregateClass { + funcCls != AggregateClass && !hasUDFOverload { for _, expr := range s.typedExprs { if expr.ResolvedType().Family() == types.UnknownFamily { return DNull, nil