Skip to content

Commit

Permalink
opt: build ANY expressions as regular subqueries within UDFs
Browse files Browse the repository at this point in the history
To support execution of ANY expressions within UDFs, the optimizer
builds them as regular subqueries with a novel transformation. The
transformation simulates the peculiar behavior of ANY expressions.

For example, consider the expression:

    i <comp> ANY (<subquery>)

The logic for evaluating this comparison is:

  1. If the subquery results in zero rows, then the expression evaluates
     to false, even if i is NULL.
  2. Otherwise, if the comparison between i and any value returned by
     the subquery is true, then the expression evaluates to true.
  3. Otherwise, if any values returned by the subquery are NULL, then
     the expression evaluates to NULL.
  4. Otherwise, if i is NULL, then the expression evaluates to NULL.
  5. Otherwise, the expression evaluates to false.

We use the following transformation to express this logic:

    i = ANY (SELECT a FROM t)
    =>
    SELECT count > 0 AND (bool_or OR (null_count > 0 AND NULL))
    FROM (
      SELECT
        count(*) AS count,
        bool_or(cmp) AS bool_or,
        count(*) FILTER (is_null) AS null_count
      FROM (
        SELECT a = i AS cmp, a IS NULL AS is_null
        FROM (
          SELECT a FROM t
        )
      )
    )

By constructing ANY expressions within UDFs as subqueries, we avoid
adding complexity to the execution engine, which is not currently
capable of evaluating ANY expressions within the context of a UDF.

We only perform this transformation when building ANY expressions within
a UDF because this transformation may lead to query plans with slow
apply-joins. In the future, if we can eliminate the apply-joins we may
be able to apply this transformation universally, and remove existing
logic for ANY expression evaluation in the execution engine.

Fixes #87291

Release note: None
  • Loading branch information
mgartner committed Mar 10, 2023
1 parent 22c389c commit 9a9b8ce
Show file tree
Hide file tree
Showing 5 changed files with 467 additions and 9 deletions.
132 changes: 128 additions & 4 deletions pkg/sql/logictest/testdata/logic_test/udf
Original file line number Diff line number Diff line change
Expand Up @@ -2812,6 +2812,130 @@ SELECT a, sub_two() FROM sub_all
5 2
6 2

subtest any_subquery

statement ok
CREATE TABLE any_tab (
a INT,
b INT
)

statement ok
CREATE FUNCTION any_fn(i INT) RETURNS BOOL LANGUAGE SQL AS $$
SELECT i = ANY(SELECT a FROM any_tab)
$$

statement ok
CREATE FUNCTION any_fn_lt(i INT) RETURNS BOOL LANGUAGE SQL AS $$
SELECT i < ANY(SELECT a FROM any_tab)
$$

statement ok
CREATE FUNCTION any_fn_tuple(i INT, j INT) RETURNS BOOL LANGUAGE SQL AS $$
SELECT (i, j) = ANY(SELECT a, b FROM any_tab)
$$

# If the subquery returns no rows, the result should always be false.
query BBB
SELECT any_fn(1), any_fn(4), any_fn(NULL::INT)
----
false false false

query BBB
SELECT any_fn_lt(1), any_fn_lt(4), any_fn_lt(NULL::INT)
----
false false false

query BBB
SELECT any_fn_tuple(1, 10), any_fn_tuple(1, 20), any_fn_tuple(NULL::INT, NULL::INT)
----
false false false

statement ok
INSERT INTO any_tab VALUES (1, 10), (3, 30)

query BBB
SELECT any_fn(1), any_fn(4), any_fn(NULL::INT)
----
true false NULL

query BBB
SELECT any_fn_lt(1), any_fn_lt(4), any_fn_lt(NULL::INT)
----
true false NULL

query BBB
SELECT any_fn_tuple(1, 10), any_fn_tuple(1, 20), any_fn_tuple(NULL::INT, NULL::INT)
----
true false NULL

statement ok
INSERT INTO any_tab VALUES (NULL, NULL)

query BBB
SELECT any_fn(1), any_fn(4), any_fn(NULL::INT)
----
true NULL NULL

query BBB
SELECT any_fn_lt(1), any_fn_lt(4), any_fn_lt(NULL::INT)
----
true NULL NULL

query BBB
SELECT any_fn_tuple(1, 10), any_fn_tuple(1, 20), any_fn_tuple(NULL::INT, NULL::INT)
----
true NULL NULL

statement ok
CREATE FUNCTION any_fn2(i INT) RETURNS SETOF INT LANGUAGE SQL AS $$
SELECT b FROM (VALUES (1), (2), (3), (NULL)) v(b)
WHERE b = ANY (SELECT a FROM any_tab WHERE a <= i)
$$

query I
SELECT any_fn2(2)
----
1

query I
SELECT any_fn2(3)
----
1
3

subtest all_subquery

statement ok
CREATE TABLE all_tab (a INT)

statement ok
CREATE FUNCTION all_fn(i INT) RETURNS BOOL LANGUAGE SQL AS $$
SELECT i = ALL(SELECT a FROM all_tab)
$$

# If the subquery returns no rows, the result should always be true.
query BBB
SELECT all_fn(1), all_fn(2), all_fn(NULL::INT)
----
true true true

statement ok
INSERT INTO all_tab VALUES (1), (1);

query BBB
SELECT all_fn(1), all_fn(2), all_fn(NULL::INT)
----
true false NULL

statement ok
INSERT INTO all_tab VALUES (NULL);

query BBB
SELECT all_fn(1), all_fn(2), all_fn(NULL::INT)
----
NULL false NULL


subtest variadic

Expand Down Expand Up @@ -2922,10 +3046,10 @@ SELECT oid, proname, pronamespace, proowner, prolang, proleakproof, proisstrict,
FROM pg_catalog.pg_proc WHERE proname IN ('f_93314', 'f_93314_alias', 'f_93314_comp', 'f_93314_comp_t')
ORDER BY oid;
----
100264 f_93314 105 1546506610 14 false false false v 0 100263 · {} NULL SELECT i, e FROM test.public.t_93314 ORDER BY i LIMIT 1;
100266 f_93314_alias 105 1546506610 14 false false false v 0 100265 · {} NULL SELECT i, e FROM test.public.t_93314_alias ORDER BY i LIMIT 1;
100270 f_93314_comp 105 1546506610 14 false false false v 0 100267 · {} NULL SELECT (1, 2);
100271 f_93314_comp_t 105 1546506610 14 false false false v 0 100269 · {} NULL SELECT a, c FROM test.public.t_93314_comp LIMIT 1;
100271 f_93314 105 1546506610 14 false false false v 0 100270 · {} NULL SELECT i, e FROM test.public.t_93314 ORDER BY i LIMIT 1;
100273 f_93314_alias 105 1546506610 14 false false false v 0 100272 · {} NULL SELECT i, e FROM test.public.t_93314_alias ORDER BY i LIMIT 1;
100277 f_93314_comp 105 1546506610 14 false false false v 0 100274 · {} NULL SELECT (1, 2);
100278 f_93314_comp_t 105 1546506610 14 false false false v 0 100276 · {} NULL SELECT a, c FROM test.public.t_93314_comp LIMIT 1;

# Regression test for #95240. Strict UDFs that are inlined should result in NULL
# when presented with NULL arguments.
Expand Down
7 changes: 7 additions & 0 deletions pkg/sql/opt/optbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,13 @@ type Builder struct {
// are disabled and only statements whitelisted are allowed.
insideFuncDef bool

// udfDepth tracks the depth of UDFs within which the current expressions
// are being built. It is incremented before building statements in a UDF
// and decremented after all the statements in a UDF have been built. If
// udfDepth is greater than zero, then the builder is currently building
// expressions within one or more UDFs.
udfDepth int

// If set, we are collecting view dependencies in schemaDeps. This can only
// happen inside view/function definitions.
//
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/opt/optbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ func (b *Builder) buildUDF(
// Build an expression for each statement in the function body.
rels := make(memo.RelListExpr, len(stmts))
isSetReturning := o.Class == tree.GeneratorClass
b.udfDepth++
for i := range stmts {
stmtScope := b.buildStmt(stmts[i].AST, nil /* desiredTypes */, bodyScope)
expr := stmtScope.expr
Expand Down Expand Up @@ -768,6 +769,7 @@ func (b *Builder) buildUDF(
PhysProps: physProps,
}
}
b.udfDepth--

out = b.factory.ConstructUDF(
args,
Expand Down
126 changes: 121 additions & 5 deletions pkg/sql/opt/optbuilder/subquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,11 +372,18 @@ func (b *Builder) buildMultiRowSubquery(
))
}

// Construct the outer Any(...) operator.
out = b.factory.ConstructAny(input, scalar, &memo.SubqueryPrivate{
Cmp: cmp,
OriginalExpr: s.Subquery,
})
if b.udfDepth > 0 {
// Any expressions are cannot be built by the optimizer within a UDF, so
// building them as regular subqueries instead.
out = b.buildAnyAsSubquery(scalar, cmp, input, s.Subquery)
} else {
// Construct the outer Any(...) operator.
out = b.factory.ConstructAny(input, scalar, &memo.SubqueryPrivate{
Cmp: cmp,
OriginalExpr: s.Subquery,
})
}

switch c.Operator.Symbol {
case treecmp.NotIn, treecmp.All:
// NOT Any(...)
Expand All @@ -386,6 +393,115 @@ func (b *Builder) buildMultiRowSubquery(
return out, outScope
}

// buildAnyAsSubquery builds an Any expression as a SubqueryExpr. An Any
// expression such as the one below has peculiar behavior.
//
// i <comp> ANY (<subquery>)
//
// The logic for evaluating this comparison is:
//
// 1. If the subquery results in zero rows, then the expression evaluates to
// false, even if i is NULL.
// 2. Otherwise, if the comparison between i and any value returned by the
// subquery is true, then the expression evaluates to true.
// 3. Otherwise, if any values returned by the subquery are NULL, then the
// expression evaluates to NULL.
// 4. Otherwise, if i is NULL, then the expression evaluates to NULL.
// 5. Otherwise, the expression evaluates to false.
//
// We use the following transformation to express this logic:
//
// i = ANY (SELECT a FROM t)
// =>
// SELECT count > 0 AND (bool_or OR (null_count > 0 AND NULL))
// FROM (
// SELECT
// count(*) AS count,
// bool_or(cmp) AS bool_or,
// count(*) FILTER (is_null) AS null_count
// FROM (
// SELECT a = i AS cmp, a IS NULL AS is_null
// FROM (
// SELECT a FROM t
// )
// )
// )
func (b *Builder) buildAnyAsSubquery(
left opt.ScalarExpr, op opt.Operator, sub memo.RelExpr, origExpr *tree.Subquery,
) opt.ScalarExpr {
f := b.factory
md := f.Metadata()
subCol := sub.Relational().OutputCols.SingleColumn()

// Create projections of:
// left <op> subCol
// left IS NULL
cmpCol := md.AddColumn("cmp", types.Bool)
isNullCol := md.AddColumn("is_null", types.Bool)
var isNull opt.ScalarExpr
if md.ColumnMeta(subCol).Type.Family() == types.TupleFamily {
// If the subquery results in a tuple, we must use an IsTupleNullExpr.
isNull = f.ConstructIsTupleNull(f.ConstructVariable(subCol))
} else {
isNull = f.ConstructIs(f.ConstructVariable(subCol), memo.NullSingleton)
}
projections := memo.ProjectionsExpr{
f.ConstructProjectionsItem(
b.constructComparisonWithOp(op, left, f.ConstructVariable(subCol)),
cmpCol,
),
f.ConstructProjectionsItem(isNull, isNullCol),
}
out := f.ConstructProject(sub, projections, opt.ColSet{} /* passthrough */)

// Create aggregations for:
// count(*)
// bool_or(cmpCol)
// count(*) FILTER (isNullCol)
countCol := md.AddColumn("count", types.Int)
boolOrCol := md.AddColumn("bool_or", types.Bool)
nullCountCol := md.AddColumn("null_count", types.Int)
aggs := memo.AggregationsExpr{
f.ConstructAggregationsItem(f.ConstructCountRows(), countCol),
f.ConstructAggregationsItem(f.ConstructBoolOr(f.ConstructVariable(cmpCol)), boolOrCol),
f.ConstructAggregationsItem(
f.ConstructAggFilter(
f.ConstructCountRows(),
f.ConstructVariable(isNullCol),
),
nullCountCol,
),
}
out = f.ConstructScalarGroupBy(out, aggs, &memo.GroupingPrivate{})

// Create a projection of:
// countCol > 0 AND (boolOrCol OR (nullCountCol > 0 AND NULL))
resCol := md.AddColumn("any", types.Bool)
resultProj := memo.ProjectionsExpr{
f.ConstructProjectionsItem(
f.ConstructAnd(
f.ConstructGt(
f.ConstructVariable(countCol),
f.ConstructConstVal(tree.DZero, types.Int),
),
f.ConstructOr(
f.ConstructVariable(boolOrCol),
f.ConstructAnd(
f.ConstructGt(
f.ConstructVariable(nullCountCol),
f.ConstructConstVal(tree.DZero, types.Int),
),
memo.NullSingleton,
),
),
),
resCol,
),
}
out = b.factory.ConstructProject(out, resultProj, opt.ColSet{})
return b.factory.ConstructSubquery(out, &memo.SubqueryPrivate{OriginalExpr: origExpr})
}

var _ tree.Expr = &subquery{}
var _ tree.TypedExpr = &subquery{}

Expand Down
Loading

0 comments on commit 9a9b8ce

Please sign in to comment.