diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go index 737f13d03980..50abb799c34b 100644 --- a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go +++ b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go @@ -301,6 +301,13 @@ func TestTenantLogic_as_of( runLogicTest(t, "as_of") } +func TestTenantLogic_asyncpg( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "asyncpg") +} + func TestTenantLogic_auto_span_config_reconciliation_job( t *testing.T, ) { diff --git a/pkg/sql/logictest/testdata/logic_test/asyncpg b/pkg/sql/logictest/testdata/logic_test/asyncpg new file mode 100644 index 000000000000..2b2fe6b085f4 --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/asyncpg @@ -0,0 +1,229 @@ +# Tests for queries made by asyncpg. + +# Regression test for #71908 and #80169. +query TTTTTTTTTTITTI rowsort +---------------------------------------------------------------------------------------- +WITH RECURSIVE + typeinfo_tree + ( + oid, ns, name, kind, basetype, elemtype, elemdelim, range_subtype, attrtypoids, attrnames, depth + ) + AS ( + SELECT + ti.oid, + ti.ns, + ti.name, + ti.kind, + ti.basetype, + ti.elemtype, + ti.elemdelim, + ti.range_subtype, + ti.attrtypoids, + ti.attrnames, + 0 + FROM + ( + SELECT + t.oid AS oid, + ns.nspname AS ns, + t.typname AS name, + t.typtype AS kind, + CASE + WHEN t.typtype = 'd' + THEN ( + WITH RECURSIVE + typebases (oid, depth) + AS ( + SELECT + t2.typbasetype AS oid, 0 AS depth + FROM + pg_type AS t2 + WHERE + t2.oid = t.oid + UNION ALL + SELECT + t2.typbasetype AS oid, + tb.depth + 1 AS depth + FROM + pg_type AS t2, typebases AS tb + WHERE + tb.oid = t2.oid AND t2.typbasetype != 0 + ) + SELECT + oid + FROM + typebases + ORDER BY + depth DESC + LIMIT + 1 + ) + ELSE NULL + END + AS basetype, + t.typelem AS elemtype, + elem_t.typdelim AS elemdelim, + range_t.rngsubtype AS range_subtype, + CASE + WHEN t.typtype = 'c' + THEN ( + SELECT + array_agg(ia.atttypid ORDER BY ia.attnum) + FROM + pg_attribute AS ia + INNER JOIN pg_class AS c ON ia.attrelid = c.oid + WHERE + ia.attnum > 0 + AND NOT ia.attisdropped + AND c.reltype = t.oid + ) + ELSE NULL + END + AS attrtypoids, + CASE + WHEN t.typtype = 'c' + THEN ( + SELECT + array_agg(ia.attname::STRING ORDER BY ia.attnum) + FROM + pg_attribute AS ia + INNER JOIN pg_class AS c ON ia.attrelid = c.oid + WHERE + ia.attnum > 0 + AND NOT ia.attisdropped + AND c.reltype = t.oid + ) + ELSE NULL + END + AS attrnames + FROM + pg_catalog.pg_type AS t + INNER JOIN pg_catalog.pg_namespace AS ns ON + ns.oid = t.typnamespace + LEFT JOIN pg_type AS elem_t ON + t.typlen = -1 + AND t.typelem != 0 + AND t.typelem = elem_t.oid + LEFT JOIN pg_range AS range_t ON t.oid = range_t.rngtypid + ) + AS ti + WHERE + ti.oid = ANY ARRAY[21, 23]::OID[] + UNION ALL + SELECT + ti.oid, + ti.ns, + ti.name, + ti.kind, + ti.basetype, + ti.elemtype, + ti.elemdelim, + ti.range_subtype, + ti.attrtypoids, + ti.attrnames, + tt.depth + 1 + FROM + ( + SELECT + t.oid AS oid, + ns.nspname AS ns, + t.typname AS name, + t.typtype AS kind, + CASE + WHEN t.typtype = 'd' + THEN ( + WITH RECURSIVE + typebases (oid, depth) + AS ( + SELECT + t2.typbasetype AS oid, 0 AS depth + FROM + pg_type AS t2 + WHERE + t2.oid = t.oid + UNION ALL + SELECT + t2.typbasetype AS oid, + tb.depth + 1 AS depth + FROM + pg_type AS t2, typebases AS tb + WHERE + tb.oid = t2.oid + AND t2.typbasetype != 0 + ) + SELECT + oid + FROM + typebases + ORDER BY + depth DESC + LIMIT + 1 + ) + ELSE NULL + END + AS basetype, + t.typelem AS elemtype, + elem_t.typdelim AS elemdelim, + range_t.rngsubtype AS range_subtype, + CASE + WHEN t.typtype = 'c' + THEN ( + SELECT + array_agg(ia.atttypid ORDER BY ia.attnum) + FROM + pg_attribute AS ia + INNER JOIN pg_class AS c ON ia.attrelid = c.oid + WHERE + ia.attnum > 0 + AND NOT ia.attisdropped + AND c.reltype = t.oid + ) + ELSE NULL + END + AS attrtypoids, + CASE + WHEN t.typtype = 'c' + THEN ( + SELECT + array_agg(ia.attname::STRING ORDER BY ia.attnum) + FROM + pg_attribute AS ia + INNER JOIN pg_class AS c ON ia.attrelid = c.oid + WHERE + ia.attnum > 0 + AND NOT ia.attisdropped + AND c.reltype = t.oid + ) + ELSE NULL + END + AS attrnames + FROM + pg_catalog.pg_type AS t + INNER JOIN pg_catalog.pg_namespace AS ns ON + ns.oid = t.typnamespace + LEFT JOIN pg_type AS elem_t ON + t.typlen = -1 + AND t.typelem != 0 + AND t.typelem = elem_t.oid + LEFT JOIN pg_range AS range_t ON t.oid = range_t.rngtypid + ) + AS ti, + typeinfo_tree AS tt + WHERE + (tt.elemtype IS NOT NULL AND ti.oid = tt.elemtype) + OR (tt.attrtypoids IS NOT NULL AND ti.oid = ANY tt.attrtypoids) + OR (tt.range_subtype IS NOT NULL AND ti.oid = tt.range_subtype) + ) +SELECT + DISTINCT *, + basetype::REGTYPE::STRING AS basetype_name, + elemtype::REGTYPE::STRING AS elemtype_name, + range_subtype::REGTYPE::STRING AS range_subtype_name +FROM + typeinfo_tree +ORDER BY + depth DESC +---- +21 pg_catalog int2 b NULL 0 NULL NULL NULL NULL 0 NULL - NULL +23 pg_catalog int4 b NULL 0 NULL NULL NULL NULL 0 NULL - NULL diff --git a/pkg/sql/logictest/testdata/logic_test/subquery b/pkg/sql/logictest/testdata/logic_test/subquery index 5fd63647833c..a862e3c841ad 100644 --- a/pkg/sql/logictest/testdata/logic_test/subquery +++ b/pkg/sql/logictest/testdata/logic_test/subquery @@ -444,6 +444,174 @@ query I SELECT col0 FROM tab4 WHERE (col0 <= 0 AND col4 <= 5.38) OR (col4 IN (SELECT col1 FROM tab4 WHERE col1 > 8.27)) AND (col3 <= 5 AND (col3 BETWEEN 7 AND 9)) ---- +subtest correlated + +statement ok +CREATE TABLE corr ( + k INT PRIMARY KEY, + i INT +) + +statement ok +INSERT INTO corr VALUES (1, 10), (2, 22), (3, 30), (4, 40), (5, 50) + +query II rowsort +SELECT * FROM corr +WHERE CASE WHEN k < 5 THEN k*10 = (SELECT i FROM corr tmp WHERE k = corr.k) END +---- +1 10 +3 30 +4 40 + +query III colnames,rowsort +SELECT k, i, CASE WHEN k > 1 THEN (SELECT i FROM corr tmp WHERE k = corr.k-1) END AS prev_i +FROM corr +---- +k i prev_i +1 10 NULL +2 22 10 +3 30 22 +4 40 30 +5 50 40 + +# A test similar to the previous showing that the physical ordering requested by +# the ORDER BY is respected when re-optimizing the subquery. +query IIR colnames,rowsort +SELECT k, i, + CASE WHEN k > 1 THEN (SELECT i/1 FROM corr tmp WHERE i < corr.i ORDER BY i DESC LIMIT 1) END prev_i +FROM corr +---- +k i prev_i +1 10 NULL +2 22 10 +3 30 22 +4 40 30 +5 50 40 + +# The same query as above, but as a prepared statement with placeholders in the +# subquery. +statement ok +PREPARE corr_s1(INT) AS +SELECT k, i, + CASE WHEN k > 1 THEN (SELECT i/$1 FROM corr tmp WHERE i < corr.i ORDER BY i DESC LIMIT $1) END prev_i +FROM corr + +query IIR colnames,rowsort +EXECUTE corr_s1(1) +---- +k i prev_i +1 10 NULL +2 22 10 +3 30 22 +4 40 30 +5 50 40 + +# A subquery with a star-expansion. +query IIR colnames,rowsort +SELECT k, i, + CASE WHEN k > 1 THEN ( + SELECT * FROM (VALUES (33::DECIMAL)) v(i) + UNION ALL + SELECT i/1 FROM corr tmp WHERE i < corr.i + ORDER BY i DESC LIMIT 1 + ) END prev_i +FROM corr +---- +k i prev_i +1 10 NULL +2 22 33 +3 30 33 +4 40 33 +5 50 40 + +# TODO(mgartner): Execute correlated EXISTS subqueries. +statement error could not decorrelate subquery +SELECT * FROM corr +WHERE CASE WHEN k < 5 THEN EXISTS (SELECT i FROM corr tmp WHERE i = corr.k*10) END + +# TODO(mgartner): Execute correlated ANY subqueries. +statement error could not decorrelate subquery +SELECT * FROM corr +WHERE CASE WHEN k < 5 THEN k*10 = ANY (SELECT i FROM corr tmp WHERE k <= corr.k) END + +# Correlated subqueries can reference outer with expressions. +query III colnames,rowsort +WITH w AS MATERIALIZED ( + (VALUES (1)) +) +SELECT k, i, + CASE WHEN k > 0 THEN (SELECT i+corr.i FROM corr tmp UNION ALL SELECT * FROM w LIMIT 1) END i_plus_first_i +FROM corr +---- +k i i_plus_first_i +1 10 20 +2 22 32 +3 30 40 +4 40 50 +5 50 60 + +# Uncorrelated subqueries within correlated subqueries can reference outer with +# expressions. +query III colnames,rowsort +WITH w AS MATERIALIZED ( + (VALUES (1)) +) +SELECT k, i, + CASE WHEN k > 0 THEN (SELECT i+corr.i FROM corr tmp WHERE k = (SELECT * FROM w)) END i_plus_first_i +FROM corr +---- +k i i_plus_first_i +1 10 20 +2 22 32 +3 30 40 +4 40 50 +5 50 60 + +# WITH within subquery that is shadowing outer WITH. +query III colnames,rowsort +WITH w(i) AS MATERIALIZED ( + (VALUES (1)) +) +SELECT k, i, + CASE WHEN k > 0 THEN ( + WITH w(i) AS MATERIALIZED ( + (VALUES (2)) + ) + SELECT * FROM w UNION ALL SELECT i+corr.i FROM corr tmp LIMIT 1 + ) END w +FROM corr +UNION ALL +SELECT NULL, NULL, i FROM w +---- +k i w +1 10 2 +2 22 2 +3 30 2 +4 40 2 +5 50 2 +NULL NULL 1 + +statement ok +CREATE TABLE corr2 (i INT); + +# A NOT MATERIALIZED CTE with a mutation. +# TODO(mgartner): Lift this restriction. +statement error could not decorrelate subquery +WITH tmp AS NOT MATERIALIZED (INSERT INTO corr2 VALUES (1) RETURNING i) +SELECT * FROM corr +WHERE CASE WHEN k < 5 THEN k+1 = (SELECT i FROM tmp WHERE i = corr.k) END + +# The statement above should perform the INSERT only once. +# TODO(mgartner): This should return 1 when the statement above can be executed +# successfully. +query I +SELECT count(*) FROM corr2 +---- +0 + + +subtest regressions + statement ok CREATE TABLE z (z INT PRIMARY KEY) diff --git a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go index f75661af5ace..47766181a95b 100644 --- a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go @@ -261,6 +261,13 @@ func TestLogic_as_of( runLogicTest(t, "as_of") } +func TestLogic_asyncpg( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "asyncpg") +} + func TestLogic_auto_span_config_reconciliation_job( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go index 64450445b8c3..7aed138e125b 100644 --- a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go @@ -261,6 +261,13 @@ func TestLogic_as_of( runLogicTest(t, "as_of") } +func TestLogic_asyncpg( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "asyncpg") +} + func TestLogic_auto_span_config_reconciliation_job( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist/generated_test.go b/pkg/sql/logictest/tests/fakedist/generated_test.go index df09b0cd5d1e..dc2a2c990652 100644 --- a/pkg/sql/logictest/tests/fakedist/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist/generated_test.go @@ -261,6 +261,13 @@ func TestLogic_as_of( runLogicTest(t, "as_of") } +func TestLogic_asyncpg( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "asyncpg") +} + func TestLogic_auto_span_config_reconciliation_job( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go index a9cf11b1b661..76f32853792b 100644 --- a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go +++ b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go @@ -261,6 +261,13 @@ func TestLogic_as_of( runLogicTest(t, "as_of") } +func TestLogic_asyncpg( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "asyncpg") +} + func TestLogic_auto_span_config_reconciliation_job( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-vec-off/generated_test.go b/pkg/sql/logictest/tests/local-vec-off/generated_test.go index 2cc4660507a1..e8f3caf8aeae 100644 --- a/pkg/sql/logictest/tests/local-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/local-vec-off/generated_test.go @@ -261,6 +261,13 @@ func TestLogic_as_of( runLogicTest(t, "as_of") } +func TestLogic_asyncpg( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "asyncpg") +} + func TestLogic_auto_span_config_reconciliation_job( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local/generated_test.go b/pkg/sql/logictest/tests/local/generated_test.go index 189c94f2f5fc..3b8ae8265e1f 100644 --- a/pkg/sql/logictest/tests/local/generated_test.go +++ b/pkg/sql/logictest/tests/local/generated_test.go @@ -261,6 +261,13 @@ func TestLogic_as_of( runLogicTest(t, "as_of") } +func TestLogic_asyncpg( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "asyncpg") +} + func TestLogic_auto_span_config_reconciliation_job( t *testing.T, ) { diff --git a/pkg/sql/opt/exec/execbuilder/scalar.go b/pkg/sql/opt/exec/execbuilder/scalar.go index 15a85c8eec1d..85ed373bc0a7 100644 --- a/pkg/sql/opt/exec/execbuilder/scalar.go +++ b/pkg/sql/opt/exec/execbuilder/scalar.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/exec" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" + "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" "github.com/cockroachdb/cockroach/pkg/sql/opt/xform" "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins/builtinsregistry" "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" @@ -540,6 +541,8 @@ func (b *Builder) buildItem(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Ty func (b *Builder) buildAny(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.TypedExpr, error) { any := scalar.(*memo.AnyExpr) // We cannot execute correlated subqueries. + // TODO(mgartner): Plan correlated ANY subqueries using tree.RoutineExpr. + // See buildSubquery. if !any.Input.Relational().OuterCols.Empty() { return nil, b.decorrelationError() } @@ -581,6 +584,8 @@ func (b *Builder) buildExistsSubquery( ) (tree.TypedExpr, error) { exists := scalar.(*memo.ExistsExpr) // We cannot execute correlated subqueries. + // TODO(mgartner): Plan correlated EXISTS subqueries using tree.RoutineExpr. + // See buildSubquery. if !exists.Input.Relational().OuterCols.Empty() { return nil, b.decorrelationError() } @@ -610,16 +615,56 @@ func (b *Builder) buildSubquery( return nil, errors.Errorf("subquery input with multiple columns") } - // We cannot execute correlated subqueries. - // TODO(mgartner): We can execute correlated subqueries by making them - // routines, like we do below. - if !input.Relational().OuterCols.Empty() { - return nil, b.decorrelationError() + // Build correlated subqueries as lazily-evaluated routines. + if outerCols := input.Relational().OuterCols; !outerCols.Empty() { + // Routines do not yet support mutations. + // TODO(mgartner): Lift this restriction once routines support + // mutations. + if input.Relational().CanMutate { + return nil, b.decorrelationError() + } + + // The outer columns of the subquery become the parameters of the + // routine. + params := outerCols.ToList() + + // The outer columns of the subquery, as indexed columns, are the + // arguments of the routine. + // The arguments are indexed variables representing the outer columns. + args := make(tree.TypedExprs, len(params)) + for i := range args { + args[i] = b.indexedVar(ctx, b.mem.Metadata(), params[i]) + } + + // Create a single-element RelListExpr representing the subquery. + outputCol := input.Relational().OutputCols.SingleColumn() + aliasedCol := opt.AliasedColumn{ + Alias: b.mem.Metadata().ColumnMeta(outputCol).Alias, + ID: outputCol, + } + stmts := memo.RelListExpr{memo.RelRequiredPropsExpr{ + RelExpr: input, + PhysProps: &physical.Required{ + Presentation: physical.Presentation{aliasedCol}, + }, + }} + + // Create a tree.RoutinePlanFn that can plan the single statement + // representing the subquery. + planFn := b.buildRoutinePlanFn(params, stmts, true /* allowOuterWithRefs */) + return tree.NewTypedRoutineExpr( + "subquery", + args, + planFn, + 1, /* numStmts */ + subquery.Typ, + false, /* enableStepping */ + true, /* calledOnNullInput */ + ), nil } + // Build lazily-evaluated, uncorrelated subqueries as routines. if b.planLazySubqueries { - // Build lazily-evaluated subqueries as routines. - // // Note: We reuse the optimizer and memo from the original expression // because we don't need to optimize the subquery input any further. // It's already been fully optimized because it is uncorrelated and has @@ -629,11 +674,14 @@ func (b *Builder) buildSubquery( // once. We should cache their result to avoid all this overhead for // every invocation. inputRowCount := int64(input.Relational().Statistics().RowCountIfAvailable()) + withExprs := make([]builtWithExpr, len(b.withExprs)) + copy(withExprs, b.withExprs) planFn := func( ctx context.Context, ref tree.RoutineExecFactory, stmtIdx int, args tree.Datums, ) (tree.RoutinePlan, error) { ef := ref.(exec.Factory) eb := New(ctx, ef, b.optimizer, b.mem, b.catalog, input, b.evalCtx, false /* allowAutoCommit */, b.IsANSIDML) + eb.withExprs = withExprs eb.disableTelemetry = true eb.planLazySubqueries = true plan, err := eb.buildRelational(input) @@ -723,7 +771,8 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ } // Create a tree.RoutinePlanFn that can plan the statements in the UDF body. - planFn := b.buildRoutinePlanFn(udf.Params, udf.Body) + // TODO(mgartner): Add support for WITH expressions inside UDF bodies. + planFn := b.buildRoutinePlanFn(udf.Params, udf.Body, false /* allowOuterWithRefs */) // Enable stepping for volatile functions so that statements within the UDF // see mutations made by the invoking statement and by previous executed @@ -742,9 +791,9 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ } // buildRoutinePlanFn returns a tree.RoutinePlanFn that can plan the statements -// in a routine. +// in a routine that has one or more arguments. func (b *Builder) buildRoutinePlanFn( - params opt.ColList, stmts memo.RelListExpr, + params opt.ColList, stmts memo.RelListExpr, allowOuterWithRefs bool, ) tree.RoutinePlanFn { // argOrd returns the ordinal of the argument within the arguments list that // can be substituted for each reference to the given function parameter @@ -759,6 +808,13 @@ func (b *Builder) buildRoutinePlanFn( return 0, false } + // We will pre-populate the withExprs of the new execbuilder. + var withExprs []builtWithExpr + if allowOuterWithRefs { + withExprs = make([]builtWithExpr, len(b.withExprs)) + copy(withExprs, b.withExprs) + } + // Plan the statements in a separate memo. We use an exec.Factory passed to // the closure rather than b.factory to support executing plans that are // generated with explain.Factory. @@ -777,28 +833,53 @@ func (b *Builder) buildRoutinePlanFn( // Copy the expression into a new memo. Replace parameter references // with argument datums. + addedWithBindings := false var replaceFn norm.ReplaceFunc replaceFn = func(e opt.Expr) opt.Expr { - if v, ok := e.(*memo.VariableExpr); ok { - if ord, ok := argOrd(v.Col); ok { - return f.ConstructConstVal(args[ord], v.Typ) + switch t := e.(type) { + case *memo.VariableExpr: + if ord, ok := argOrd(t.Col); ok { + return f.ConstructConstVal(args[ord], t.Typ) + } + + case *memo.WithScanExpr: + // Allow referring to "outer" With expressions, if + // allowOuterWithRefs is true. The bound expressions are not + // part of this Memo, but they are used only for their + // relational properties, which should be valid. + // + // We must add all With expressions to the metadata even if they + // aren't referred to directly because they might be referred to + // transitively through other With expressions. For example, if + // stmt refers to With expression &1, and &1 refers to With + // expression &2, we must include &2 in the metadata so that its + // relational properties are available. See #87733. + // + // We lazily add these With expressions to the metadata here + // because the call to Factory.CopyAndReplace below clears With + // expressions in the metadata. + if allowOuterWithRefs && !addedWithBindings { + b.mem.Metadata().ForEachWithBinding(func(id opt.WithID, expr opt.Expr) { + f.Metadata().AddWithBinding(id, expr) + }) + addedWithBindings = true } + // Fall through. } return f.CopyAndReplaceDefault(e, replaceFn) } f.CopyAndReplace(stmt, stmt.PhysProps, replaceFn) // Optimize the memo. - newRightSide, err := o.Optimize() + optimizedExpr, err := o.Optimize() if err != nil { return nil, err } // Build the memo into a plan. - // TODO(mgartner): Add support for WITH expressions inside UDF bodies. - // TODO(mgartner): Add support for subqueries inside UDF bodies. ef := ref.(exec.Factory) - eb := New(ctx, ef, &o, f.Memo(), b.catalog, newRightSide, b.evalCtx, false /* allowAutoCommit */, b.IsANSIDML) + eb := New(ctx, ef, &o, f.Memo(), b.catalog, optimizedExpr, b.evalCtx, false /* allowAutoCommit */, b.IsANSIDML) + eb.withExprs = withExprs eb.disableTelemetry = true eb.planLazySubqueries = true plan, err := eb.Build() @@ -808,7 +889,7 @@ func (b *Builder) buildRoutinePlanFn( // inner expression. fmtFlags := memo.ExprFmtHideQualifications | memo.ExprFmtHideScalars | memo.ExprFmtHideTypes - explainOpt := o.FormatExpr(newRightSide, fmtFlags) + explainOpt := o.FormatExpr(optimizedExpr, fmtFlags) err = errors.WithDetailf(err, "routineExpr:\n%s", explainOpt) } return nil, err diff --git a/pkg/sql/opt/exec/execbuilder/testdata/subquery b/pkg/sql/opt/exec/execbuilder/testdata/subquery index fecebf6fc26d..b62c35c720a6 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/subquery +++ b/pkg/sql/opt/exec/execbuilder/testdata/subquery @@ -451,3 +451,64 @@ vectorized: true estimated row count: 1,000 (missing stats) table: abc@abc_pkey spans: FULL SCAN + +statement ok +CREATE TABLE corr ( + k INT PRIMARY KEY, + i INT, + FAMILY (k, i) +); +INSERT INTO corr VALUES (1, 10), (2, 22), (3, 30), (4, 40), (5, 50) + +# Case where the subquery in a filter cannot be hoisted into an apply-join. +query T +EXPLAIN (VERBOSE) +SELECT * FROM corr +WHERE CASE WHEN k < 5 THEN k*10 = (SELECT i FROM corr tmp WHERE k = corr.k) END +---- +distribution: local +vectorized: true +· +• filter +│ columns: (k, i) +│ estimated row count: 333 (missing stats) +│ filter: CASE WHEN k < 5 THEN (k * 10) = subquery(k) ELSE CAST(NULL AS BOOL) END +│ +└── • scan + columns: (k, i) + estimated row count: 1,000 (missing stats) + table: corr@corr_pkey + spans: FULL SCAN + +# Case where the subquery in a projection cannot be hoisted into an apply-join. +query T +EXPLAIN (VERBOSE) +SELECT k, i, CASE WHEN k > 1 THEN (SELECT i FROM corr tmp WHERE k = corr.k-1) ELSE 0 END AS prev_i +FROM corr +---- +distribution: local +vectorized: true +· +• render +│ columns: (k, i, prev_i) +│ render prev_i: CASE WHEN k > 1 THEN subquery(k) ELSE 0 END +│ render k: k +│ render i: i +│ +└── • scan + columns: (k, i) + estimated row count: 1,000 (missing stats) + table: corr@corr_pkey + spans: FULL SCAN + +# Each invocation of the subquery is re-optimized, so the scans are constrained +# by constant values substituted for corr.k. +query T kvtrace +SELECT k, i, CASE WHEN k > 1 THEN (SELECT i FROM corr tmp WHERE k = corr.k-1) ELSE 0 END AS prev_i +FROM corr +---- +Scan /Table/110/{1-2} +Scan /Table/110/1/1/0 +Scan /Table/110/1/2/0 +Scan /Table/110/1/3/0 +Scan /Table/110/1/4/0