Skip to content

Commit

Permalink
sql: support array-flatten subqueries within UDFs
Browse files Browse the repository at this point in the history
Array-flatten subqueries (e.g., `ARRAY(SELECT a FROM t)`) are now
supported within UDFs. They are now converted to a normal subquery with
a ScalarGroupBy if they exist within a UDF, even if they are
uncorrelated. This allows them to be executed without any changes to the
execbuilder or the evaluation logic of `tree.Routine`.

Fixes #98738

Release note: None
  • Loading branch information
mgartner committed Mar 20, 2023
1 parent ea389ff commit a8a3b75
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 26 deletions.
28 changes: 24 additions & 4 deletions pkg/sql/logictest/testdata/logic_test/udf
Original file line number Diff line number Diff line change
Expand Up @@ -2945,6 +2945,26 @@ SELECT all_fn(1), all_fn(2), all_fn(NULL::INT)
NULL false NULL


subtest array_flatten

statement ok
CREATE FUNCTION arr(x INT) RETURNS INT[] LANGUAGE SQL AS $$
SELECT ARRAY(VALUES (1), (2), (x));
$$

query T
SELECT arr(10)
----
{1,2,10}

query T
SELECT arr(i) FROM generate_series(1, 3) g(i)
----
{1,2,1}
{1,2,2}
{1,2,3}


subtest variadic

# Variadic UDFS are not currently supported.
Expand Down Expand Up @@ -3054,10 +3074,10 @@ SELECT oid, proname, pronamespace, proowner, prolang, proleakproof, proisstrict,
FROM pg_catalog.pg_proc WHERE proname IN ('f_93314', 'f_93314_alias', 'f_93314_comp', 'f_93314_comp_t')
ORDER BY oid;
----
100271 f_93314 105 1546506610 14 false false false v 0 100270 · {} NULL SELECT i, e FROM test.public.t_93314 ORDER BY i LIMIT 1;
100273 f_93314_alias 105 1546506610 14 false false false v 0 100272 · {} NULL SELECT i, e FROM test.public.t_93314_alias ORDER BY i LIMIT 1;
100277 f_93314_comp 105 1546506610 14 false false false v 0 100274 · {} NULL SELECT (1, 2);
100278 f_93314_comp_t 105 1546506610 14 false false false v 0 100276 · {} NULL SELECT a, c FROM test.public.t_93314_comp LIMIT 1;
100272 f_93314 105 1546506610 14 false false false v 0 100271 · {} NULL SELECT i, e FROM test.public.t_93314 ORDER BY i LIMIT 1;
100274 f_93314_alias 105 1546506610 14 false false false v 0 100273 · {} NULL SELECT i, e FROM test.public.t_93314_alias ORDER BY i LIMIT 1;
100278 f_93314_comp 105 1546506610 14 false false false v 0 100275 · {} NULL SELECT (1, 2);
100279 f_93314_comp_t 105 1546506610 14 false false false v 0 100277 · {} NULL SELECT a, c FROM test.public.t_93314_comp LIMIT 1;

# Regression test for #95240. Strict UDFs that are inlined should result in NULL
# when presented with NULL arguments.
Expand Down
14 changes: 10 additions & 4 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,16 @@ func (b *Builder) buildArrayFlatten(
panic(errors.AssertionFailedf("input to ArrayFlatten should be uncorrelated"))
}

if b.planLazySubqueries {
// The NormalizeArrayFlattenToAgg rule should have converted an
// ArrayFlatten within a UDF into an aggregation.
// We don't yet convert an ArrayFlatten within a correlated subquery
// into an aggregation, so we return a decorrelation error.
// TODO(mgartner): Build an ArrayFlatten within a correlated subquery as
// a Routine, or apply NormalizeArrayFlattenToAgg to all ArrayFlattens.
return nil, b.decorrelationError()
}

root, err := b.buildRelational(af.Input)
if err != nil {
return nil, err
Expand Down Expand Up @@ -762,10 +772,6 @@ func (b *Builder) buildSubquery(
// because we don't need to optimize the subquery input any further.
// It's already been fully optimized because it is uncorrelated and has
// no outer columns.
//
// TODO(mgartner): Uncorrelated subqueries only need to be evaluated
// once. We should cache their result to avoid all this overhead for
// every invocation.
inputRowCount := int64(input.Relational().Statistics().RowCountIfAvailable())
withExprs := make([]builtWithExpr, len(b.withExprs))
copy(withExprs, b.withExprs)
Expand Down
7 changes: 7 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,10 @@ vectorized: true
estimated row count: 1,000 (missing stats)
table: corr@corr_pkey
spans: FULL SCAN

# Case where a correlated subquery contains an uncorrelated array-flatten
# subquery.
statement error could not decorrelate subquery
SELECT
CASE WHEN k < 5 THEN (SELECT array(SELECT 1) FROM corr tmp WHERE k*10 = corr.k) END
FROM corr
24 changes: 24 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/udf
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,30 @@ SELECT sub_fn4() FROM generate_series(1, 3)
1
1

statement ok
CREATE FUNCTION arr() RETURNS INT[] LANGUAGE SQL AS $$
SELECT ARRAY(VALUES (1), (2));
$$

# A query with a uncorrelated array-flatten within a UDF.
query T
EXPLAIN (VERBOSE) SELECT arr() FROM generate_series(1, 3)
----
distribution: local
vectorized: true
·
• render
│ columns: (arr)
│ render arr: arr()
└── • project set
│ columns: (generate_series)
│ estimated row count: 10
│ render 0: generate_series(1, 3)
└── • emptyrow
columns: ()


subtest regressions

Expand Down
18 changes: 8 additions & 10 deletions pkg/sql/opt/norm/rules/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -383,15 +383,13 @@ $input
# an aggregation). So it's desirable to perform this conversion in the
# interest of decorrelation.
#
# So the outcome is that we can perform uncorrelated ARRAY(...)s over any datatype,
# and correlated ones only over the types that array_agg supports.
#
# Note that optbuilder should have already verified that if the input is
# correlated, then we can array_agg over the input type. Also note that the
# Max1Row operator we introduce is guaranteed to be eliminated as
# MakeArrayAggForFlatten will return a ScalarGroupBy.
# So the outcome is that we can perform uncorrelated ARRAY(...)s over any
# datatype, and correlated ones only over the types that array_agg supports.
[NormalizeArrayFlattenToAgg, Normalize]
(ArrayFlatten $input:(HasOuterCols $input) $subquery:*)
(ArrayFlatten
$input:*
$private:* & (CanNormalizeArrayFlatten $input $private)
)
=>
(Coalesce
[
Expand All @@ -403,7 +401,7 @@ $input
(ArrayAgg
(Variable
$requestedCol:(SubqueryRequestedCol
$subquery
$private
)
)
)
Expand All @@ -414,7 +412,7 @@ $input
]
(MakeGrouping
(MakeEmptyColSet)
(SubqueryOrdering $subquery)
(SubqueryOrdering $private)
)
)
(MakeUnorderedSubquery)
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/norm/scalar_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,9 @@ func (c *CustomFuncs) SplitTupleEq(lhs, rhs *memo.TupleExpr) memo.FiltersExpr {
}
return res
}

// CanNormalizeArrayFlatten returns true if the input is correlated or if the
// ArrayFlatten exists within a UDF.
func (c *CustomFuncs) CanNormalizeArrayFlatten(input memo.RelExpr, p *memo.SubqueryPrivate) bool {
return c.HasOuterCols(input) || p.WithinUDF
}
44 changes: 44 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/scalar
Original file line number Diff line number Diff line change
Expand Up @@ -1985,6 +1985,50 @@ project
├── columns: k:8!null
└── key: (8)

exec-ddl
CREATE FUNCTION arr() RETURNS INT[] LANGUAGE SQL AS $$
SELECT ARRAY(VALUES (1), (2));
$$
----

# Should trigger for uncorrelated ArrayFlatten subqueries within a UDF
norm expect=NormalizeArrayFlattenToAgg format=show-scalars
SELECT arr()
----
values
├── columns: arr:4
├── cardinality: [1 - 1]
├── volatile
├── key: ()
├── fd: ()-->(4)
└── tuple
└── udf: arr
└── body
└── values
├── columns: array:3
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(3)
└── tuple
└── coalesce
├── subquery
│ └── scalar-group-by
│ ├── columns: array_agg:2
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ ├── fd: ()-->(2)
│ ├── values
│ │ ├── columns: column1:1!null
│ │ ├── cardinality: [2 - 2]
│ │ ├── tuple
│ │ │ └── const: 1
│ │ └── tuple
│ │ └── const: 2
│ └── aggregations
│ └── array-agg [as=array_agg:2, outer=(1)]
│ └── variable: column1:1
└── const: ARRAY[]

exec-ddl
CREATE TABLE pg_class (
oid OID NULL,
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/opt/ops/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ define SubqueryPrivate {
# will eventually be output. It is only used for ArrayFlatten expressions.
RequestedCol ColumnID

# WithinUDF is set to true if the subquery exists inside a UDFExpr. It is
# only used for ArrayFlatten expressions.
WithinUDF bool

# Cmp is only used for AnyOp.
Cmp Operator

Expand Down
8 changes: 1 addition & 7 deletions pkg/sql/opt/optbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,7 @@ func (b *Builder) buildScalar(

inCol := s.cols[0].id

// This looks kind of arbitrary and strange, because it is:
// We cannot array_agg over some types, but we can only decorrelate via array_agg.
// Thus, we reject a query that is correlated and over a type that we can't array_agg.
typ := b.factory.Metadata().ColumnMeta(inCol).Type
if !s.outerCols.Empty() && !memo.AggregateOverloadExists(opt.ArrayAggOp, typ) {
panic(unimplementedWithIssueDetailf(35710, "", "can't execute a correlated ARRAY(...) over %s", typ))
}

if err := types.CheckArrayElementType(typ); err != nil {
panic(err)
}
Expand All @@ -169,6 +162,7 @@ func (b *Builder) buildScalar(
OriginalExpr: s.Subquery,
Ordering: s.ordering,
RequestedCol: inCol,
WithinUDF: b.insideUDF,
}
out = b.factory.ConstructArrayFlatten(s.node, &subqueryPrivate)

Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/testdata/rules/cycle
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ memo (not optimized, ~6KB, required=[], cycle=[G1->G4->G6->G9->G10->G12->G13->G1
├── G7: (variable v)
├── G8: (const 1)
├── G9: (scalar-list G10 G11)
├── G10: (subquery G12 &{<nil> 0 unknown false})
├── G10: (subquery G12 &{<nil> 0 false unknown false})
├── G11: (false)
├── G12: (project G13 G14)
├── G13: (limit G1 G8)
Expand Down

0 comments on commit a8a3b75

Please sign in to comment.