Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: support array-flatten subqueries within UDFs #98879

Merged
merged 1 commit into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions pkg/sql/logictest/testdata/logic_test/udf
Original file line number Diff line number Diff line change
Expand Up @@ -2945,6 +2945,26 @@ SELECT all_fn(1), all_fn(2), all_fn(NULL::INT)
NULL false NULL


subtest array_flatten

statement ok
CREATE FUNCTION arr(x INT) RETURNS INT[] LANGUAGE SQL AS $$
SELECT ARRAY(VALUES (1), (2), (x));
$$

query T
SELECT arr(10)
----
{1,2,10}

query T
SELECT arr(i) FROM generate_series(1, 3) g(i)
----
{1,2,1}
{1,2,2}
{1,2,3}


subtest variadic

# Variadic UDFS are not currently supported.
Expand Down Expand Up @@ -3054,10 +3074,10 @@ SELECT oid, proname, pronamespace, proowner, prolang, proleakproof, proisstrict,
FROM pg_catalog.pg_proc WHERE proname IN ('f_93314', 'f_93314_alias', 'f_93314_comp', 'f_93314_comp_t')
ORDER BY oid;
----
100271 f_93314 105 1546506610 14 false false false v 0 100270 · {} NULL SELECT i, e FROM test.public.t_93314 ORDER BY i LIMIT 1;
100273 f_93314_alias 105 1546506610 14 false false false v 0 100272 · {} NULL SELECT i, e FROM test.public.t_93314_alias ORDER BY i LIMIT 1;
100277 f_93314_comp 105 1546506610 14 false false false v 0 100274 · {} NULL SELECT (1, 2);
100278 f_93314_comp_t 105 1546506610 14 false false false v 0 100276 · {} NULL SELECT a, c FROM test.public.t_93314_comp LIMIT 1;
100272 f_93314 105 1546506610 14 false false false v 0 100271 · {} NULL SELECT i, e FROM test.public.t_93314 ORDER BY i LIMIT 1;
100274 f_93314_alias 105 1546506610 14 false false false v 0 100273 · {} NULL SELECT i, e FROM test.public.t_93314_alias ORDER BY i LIMIT 1;
100278 f_93314_comp 105 1546506610 14 false false false v 0 100275 · {} NULL SELECT (1, 2);
100279 f_93314_comp_t 105 1546506610 14 false false false v 0 100277 · {} NULL SELECT a, c FROM test.public.t_93314_comp LIMIT 1;

# Regression test for #95240. Strict UDFs that are inlined should result in NULL
# when presented with NULL arguments.
Expand Down
14 changes: 10 additions & 4 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,16 @@ func (b *Builder) buildArrayFlatten(
panic(errors.AssertionFailedf("input to ArrayFlatten should be uncorrelated"))
}

if b.planLazySubqueries {
// The NormalizeArrayFlattenToAgg rule should have converted an
// ArrayFlatten within a UDF into an aggregation.
// We don't yet convert an ArrayFlatten within a correlated subquery
// into an aggregation, so we return a decorrelation error.
// TODO(mgartner): Build an ArrayFlatten within a correlated subquery as
// a Routine, or apply NormalizeArrayFlattenToAgg to all ArrayFlattens.
return nil, b.decorrelationError()
}

root, err := b.buildRelational(af.Input)
if err != nil {
return nil, err
Expand Down Expand Up @@ -762,10 +772,6 @@ func (b *Builder) buildSubquery(
// because we don't need to optimize the subquery input any further.
// It's already been fully optimized because it is uncorrelated and has
// no outer columns.
//
// TODO(mgartner): Uncorrelated subqueries only need to be evaluated
// once. We should cache their result to avoid all this overhead for
// every invocation.
inputRowCount := int64(input.Relational().Statistics().RowCountIfAvailable())
withExprs := make([]builtWithExpr, len(b.withExprs))
copy(withExprs, b.withExprs)
Expand Down
7 changes: 7 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,10 @@ vectorized: true
estimated row count: 1,000 (missing stats)
table: corr@corr_pkey
spans: FULL SCAN

# Case where a correlated subquery contains an uncorrelated array-flatten
# subquery.
statement error could not decorrelate subquery
SELECT
CASE WHEN k < 5 THEN (SELECT array(SELECT 1) FROM corr tmp WHERE k*10 = corr.k) END
FROM corr
24 changes: 24 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/udf
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,30 @@ SELECT sub_fn4() FROM generate_series(1, 3)
1
1

statement ok
CREATE FUNCTION arr() RETURNS INT[] LANGUAGE SQL AS $$
SELECT ARRAY(VALUES (1), (2));
$$

# A query with a uncorrelated array-flatten within a UDF.
query T
EXPLAIN (VERBOSE) SELECT arr() FROM generate_series(1, 3)
----
distribution: local
vectorized: true
·
• render
│ columns: (arr)
│ render arr: arr()
└── • project set
│ columns: (generate_series)
│ estimated row count: 10
│ render 0: generate_series(1, 3)
└── • emptyrow
columns: ()


subtest regressions

Expand Down
17 changes: 9 additions & 8 deletions pkg/sql/opt/norm/rules/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -383,15 +383,16 @@ $input
# an aggregation). So it's desirable to perform this conversion in the
# interest of decorrelation.
#
# So the outcome is that we can perform uncorrelated ARRAY(...)s over any datatype,
# and correlated ones only over the types that array_agg supports.
# So the outcome is that we can perform uncorrelated ARRAY(...)s over any
# datatype, and correlated ones only over the types that array_agg supports.
#
# Note that optbuilder should have already verified that if the input is
# correlated, then we can array_agg over the input type. Also note that the
# Max1Row operator we introduce is guaranteed to be eliminated as
# MakeArrayAggForFlatten will return a ScalarGroupBy.
# correlated, then we can array_agg over the input type.
[NormalizeArrayFlattenToAgg, Normalize]
(ArrayFlatten $input:(HasOuterCols $input) $subquery:*)
(ArrayFlatten
$input:*
$private:* & (CanNormalizeArrayFlatten $input $private)
)
=>
(Coalesce
[
Expand All @@ -403,7 +404,7 @@ $input
(ArrayAgg
(Variable
$requestedCol:(SubqueryRequestedCol
$subquery
$private
)
)
)
Expand All @@ -414,7 +415,7 @@ $input
]
(MakeGrouping
(MakeEmptyColSet)
(SubqueryOrdering $subquery)
(SubqueryOrdering $private)
)
)
(MakeUnorderedSubquery)
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/norm/scalar_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,9 @@ func (c *CustomFuncs) SplitTupleEq(lhs, rhs *memo.TupleExpr) memo.FiltersExpr {
}
return res
}

// CanNormalizeArrayFlatten returns true if the input is correlated or if the
// ArrayFlatten exists within a UDF.
func (c *CustomFuncs) CanNormalizeArrayFlatten(input memo.RelExpr, p *memo.SubqueryPrivate) bool {
return c.HasOuterCols(input) || p.WithinUDF
}
44 changes: 44 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/scalar
Original file line number Diff line number Diff line change
Expand Up @@ -1985,6 +1985,50 @@ project
├── columns: k:8!null
└── key: (8)

exec-ddl
CREATE FUNCTION arr() RETURNS INT[] LANGUAGE SQL AS $$
SELECT ARRAY(VALUES (1), (2));
$$
----

# Should trigger for uncorrelated ArrayFlatten subqueries within a UDF
norm expect=NormalizeArrayFlattenToAgg format=show-scalars
SELECT arr()
----
values
├── columns: arr:4
├── cardinality: [1 - 1]
├── volatile
├── key: ()
├── fd: ()-->(4)
└── tuple
└── udf: arr
└── body
└── values
├── columns: array:3
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(3)
└── tuple
└── coalesce
├── subquery
│ └── scalar-group-by
│ ├── columns: array_agg:2
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ ├── fd: ()-->(2)
│ ├── values
│ │ ├── columns: column1:1!null
│ │ ├── cardinality: [2 - 2]
│ │ ├── tuple
│ │ │ └── const: 1
│ │ └── tuple
│ │ └── const: 2
│ └── aggregations
│ └── array-agg [as=array_agg:2, outer=(1)]
│ └── variable: column1:1
└── const: ARRAY[]

exec-ddl
CREATE TABLE pg_class (
oid OID NULL,
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/opt/ops/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ define SubqueryPrivate {
# will eventually be output. It is only used for ArrayFlatten expressions.
RequestedCol ColumnID

# WithinUDF is set to true if the subquery exists inside a UDFExpr. It is
# only used for ArrayFlatten expressions.
WithinUDF bool

# Cmp is only used for AnyOp.
Cmp Operator

Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/optbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ func (b *Builder) buildScalar(
OriginalExpr: s.Subquery,
Ordering: s.ordering,
RequestedCol: inCol,
WithinUDF: b.insideUDF,
}
out = b.factory.ConstructArrayFlatten(s.node, &subqueryPrivate)

Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/testdata/rules/cycle
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ memo (not optimized, ~6KB, required=[], cycle=[G1->G4->G6->G9->G10->G12->G13->G1
├── G7: (variable v)
├── G8: (const 1)
├── G9: (scalar-list G10 G11)
├── G10: (subquery G12 &{<nil> 0 unknown false})
├── G10: (subquery G12 &{<nil> 0 false unknown false})
├── G11: (false)
├── G12: (project G13 G14)
├── G13: (limit G1 G8)
Expand Down