From 9804fb4756c738714e03caa66b41909711775ffd Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Sat, 10 Sep 2022 23:54:06 -0700 Subject: [PATCH] norm: early inlining of WITH bound expression with VALUES Fixes https://github.com/cockroachlabs/support/issues/1769 This commit implements early inlining of WITH clause bound expressions (at the time when the WithScanExpr is normalized) if the WITH bound expression is a ValuesExpr with all constant expressions or placeholders and it appears in an IN or NOT IN expression, for example: `column IN (VALUES(...))` or `column NOT IN(VALUES(...))`. Not doing early inlining can result in a superfluous join. Normally WITH clause inlining is done when the top-level WithExpr is normalized, but since normalization is done bottom-up, the WithExpr is processed last, making the inlining too late to trigger other normalization rules. Release note (performance improvement): This patch adds early inlining of VALUES clauses and unnested arrays in WITH queries in order to eliminate unnecessary joins. --- .../logictest/testdata/logic_test/apply_join | 2 +- pkg/sql/opt/memo/expr.go | 51 +++++ pkg/sql/opt/norm/rules/scalar.opt | 14 ++ pkg/sql/opt/norm/testdata/rules/with | 178 +++++++++++++++++- pkg/sql/opt/norm/with_funcs.go | 66 +++++++ pkg/sql/opt/ops/relational.opt | 5 + pkg/sql/opt/optbuilder/select.go | 22 ++- 7 files changed, 326 insertions(+), 12 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/apply_join b/pkg/sql/logictest/testdata/logic_test/apply_join index b9d3a27ad9d1..19cb6a9bc83a 100644 --- a/pkg/sql/logictest/testdata/logic_test/apply_join +++ b/pkg/sql/logictest/testdata/logic_test/apply_join @@ -529,7 +529,7 @@ FROM query error pq: st_mpointfromwkb\(\): error parsing EWKB: wkb: unknown byte order: 11000000 WITH - with_111870 (col_664924) AS (SELECT * FROM (VALUES (NULL)) AS tab_397795 (col_664924)) + with_111870 (col_664924) AS (SELECT * FROM (VALUES (true)) AS tab_397795 (col_664924)) SELECT cte_ref_33032.col_664924 AS col_664951 FROM diff --git a/pkg/sql/opt/memo/expr.go b/pkg/sql/opt/memo/expr.go index d3bf34201c62..b38b3cbd883a 100644 --- a/pkg/sql/opt/memo/expr.go +++ b/pkg/sql/opt/memo/expr.go @@ -1281,3 +1281,54 @@ func (g *GroupingPrivate) GroupingOrderType(required *props.OrderingChoice) Grou } return PartialStreaming } + +// IsConstantsAndPlaceholders returns true if all values in the list are +// constant, placeholders or tuples containing constants, placeholders or other +// such nested tuples. +func (v *ValuesExpr) IsConstantsAndPlaceholders() bool { + // Constant expressions should be leak-proof, so this is a quick test to + // determine if the ValuesExpr even needs to be checked. + if !v.Relational().VolatilitySet.IsLeakproof() { + return false + } + // Another quick check for something other than constants + if !v.Relational().OuterCols.Empty() { + return false + } + // A safety check + if len(v.Rows) == 0 { + return false + } + if !v.Rows.IsConstantsAndPlaceholders(v.Memo().EvalContext()) { + return false + } + return true +} + +// IsConstantsAndPlaceholders returns true if all scalar expressions in the list +// are constants, placeholders or tuples containing constants or placeholders. +// If a tuple nested within a tuple is found, false is returned. +func (e ScalarListExpr) IsConstantsAndPlaceholders(evalCtx *eval.Context) bool { + return e.isConstantsAndPlaceholders(evalCtx, false /* insideTuple */) +} + +func (e ScalarListExpr) isConstantsAndPlaceholders(evalCtx *eval.Context, insideTuple bool) bool { + for _, scalarExpr := range e { + if tupleExpr, ok := scalarExpr.(*TupleExpr); ok { + if insideTuple { + return false + } + if !tupleExpr.Elems.isConstantsAndPlaceholders(evalCtx, true) { + return false + } + } else { + if scalarExpr == nil { + panic(errors.AssertionFailedf("nil scalar expression in list")) + } + if !opt.IsConstValueOp(scalarExpr) && scalarExpr.Op() != opt.PlaceholderOp { + return false + } + } + } + return true +} diff --git a/pkg/sql/opt/norm/rules/scalar.opt b/pkg/sql/opt/norm/rules/scalar.opt index 92bf212f300f..504d9515a8d3 100644 --- a/pkg/sql/opt/norm/rules/scalar.opt +++ b/pkg/sql/opt/norm/rules/scalar.opt @@ -70,6 +70,20 @@ $item => $input +# InlineAnyWithScan matches on an ANY or NOT ANY expression which was generated +# from expressions like `column IN (WithScan)` or `column NOT IN (WithScan)`, +# where the WITH clause definition was normalized into a VALUES clause with +# constants or placeholders. The `WithScan` is replaced with the VALUES clause +# in order to avoid a join and can potentially enable a constrained index scan. +[InlineAnyWithScan, Normalize] +(Any + (WithScan $withScanPrivate:*) + $scalar:* & (CanInlineWithScan $withScanPrivate $scalar) + $anyPrivate:* +) +=> +(Any (InlineWithScan $withScanPrivate) $scalar $anyPrivate) + # NormalizeInConst ensures that the In operator's tuple operand is sorted with # duplicates removed (since duplicates do not change the result). [NormalizeInConst, Normalize] diff --git a/pkg/sql/opt/norm/testdata/rules/with b/pkg/sql/opt/norm/testdata/rules/with index af9706dcd177..f6b691b8b35d 100644 --- a/pkg/sql/opt/norm/testdata/rules/with +++ b/pkg/sql/opt/norm/testdata/rules/with @@ -173,7 +173,6 @@ with &2 (bar) ├── key: () └── fd: ()-->(5) -# We should inline foo, but not bar. norm expect=InlineWith WITH foo AS (SELECT 1), bar AS (SELECT 2) SELECT * FROM foo CROSS JOIN bar CROSS JOIN bar AS bar2 ---- @@ -876,3 +875,180 @@ scalar-group-by └── aggregations └── sum [as=sum:6, outer=(5)] └── n:5 + +# -------------------------------------------------- +# InlineAnyWithScan +# -------------------------------------------------- + +exec-ddl +CREATE TABLE t87790(i INT, j INT, k float, CONSTRAINT "primary" PRIMARY KEY (i,j)) +---- + +# An array unnested to VALUES should be inlined. +norm expect=InlineAnyWithScan +WITH ivals AS (SELECT * FROM unnest(ARRAY[1,2,3])) +SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals) +---- +select + ├── columns: i:2!null j:3!null k:4 + ├── key: (2,3) + ├── fd: (2,3)-->(4) + ├── scan t87790 + │ ├── columns: i:2!null j:3!null k:4 + │ ├── key: (2,3) + │ └── fd: (2,3)-->(4) + └── filters + └── i:2 IN (1, 2, 3) [outer=(2), constraints=(/2: [/1 - /1] [/2 - /2] [/3 - /3]; tight)] + +# An array unnested to VALUES in a MATERIALIZED WITH should not be inlined. +norm expect-not=InlineAnyWithScan +WITH ivals AS MATERIALIZED (SELECT * FROM unnest(ARRAY[1,2,3])) +SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals) +---- +with &1 (ivals) + ├── columns: i:2!null j:3!null k:4 + ├── materialized + ├── key: (2,3) + ├── fd: (2,3)-->(4) + ├── values + │ ├── columns: unnest:1!null + │ ├── cardinality: [3 - 3] + │ ├── (1,) + │ ├── (2,) + │ └── (3,) + └── semi-join (hash) + ├── columns: i:2!null j:3!null k:4 + ├── key: (2,3) + ├── fd: (2,3)-->(4) + ├── scan t87790 + │ ├── columns: i:2!null j:3!null k:4 + │ ├── key: (2,3) + │ └── fd: (2,3)-->(4) + ├── with-scan &1 (ivals) + │ ├── columns: unnest:7!null + │ ├── mapping: + │ │ └── unnest:1 => unnest:7 + │ └── cardinality: [3 - 3] + └── filters + └── i:2 = unnest:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] + +# Random (volatile) VALUES should not be inlined. +norm expect-not=InlineAnyWithScan +WITH randvals AS (SELECT random()) +SELECT * FROM t87790 WHERE k NOT IN (SELECT * FROM randvals) +---- +with &1 (randvals) + ├── columns: i:2!null j:3!null k:4 + ├── volatile + ├── key: (2,3) + ├── fd: (2,3)-->(4) + ├── values + │ ├── columns: random:1 + │ ├── cardinality: [1 - 1] + │ ├── volatile + │ ├── key: () + │ ├── fd: ()-->(1) + │ └── (random(),) + └── anti-join (cross) + ├── columns: i:2!null j:3!null k:4 + ├── key: (2,3) + ├── fd: (2,3)-->(4) + ├── scan t87790 + │ ├── columns: i:2!null j:3!null k:4 + │ ├── key: (2,3) + │ └── fd: (2,3)-->(4) + ├── with-scan &1 (randvals) + │ ├── columns: random:7 + │ ├── mapping: + │ │ └── random:1 => random:7 + │ ├── cardinality: [1 - 1] + │ ├── key: () + │ └── fd: ()-->(7) + └── filters + └── (k:4 = random:7) IS NOT false [outer=(4,7)] + +# A non-constant WITH expression cannot be inlined in the middle of +# normalization. +norm expect-not=InlineAnyWithScan +WITH ivals AS (SELECT * FROM unnest(ARRAY(SELECT i FROM t87790))) +SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals) +---- +semi-join (hash) + ├── columns: i:7!null j:8!null k:9 + ├── immutable + ├── key: (7,8) + ├── fd: (7,8)-->(9) + ├── scan t87790 + │ ├── columns: i:7!null j:8!null k:9 + │ ├── key: (7,8) + │ └── fd: (7,8)-->(9) + ├── project + │ ├── columns: unnest:12 + │ ├── immutable + │ ├── project-set + │ │ ├── columns: unnest:6 + │ │ ├── immutable + │ │ ├── values + │ │ │ ├── cardinality: [1 - 1] + │ │ │ ├── key: () + │ │ │ └── () + │ │ └── zip + │ │ └── function: unnest [immutable, subquery] + │ │ └── array-flatten + │ │ └── scan t87790 + │ │ └── columns: i:1!null + │ └── projections + │ └── unnest:6 [as=unnest:12, outer=(6)] + └── filters + └── i:7 = unnest:12 [outer=(7,12), constraints=(/7: (/NULL - ]; /12: (/NULL - ]), fd=(7)==(12), (12)==(7)] + +# Placeholders are considered constant for the InlineAnyWithScan normalization. +assign-placeholders-norm query-args=(1, 2, 3) +WITH ivals AS (SELECT * FROM unnest(ARRAY[$1::INT, $2::INT, $3::INT])) +SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals) +---- +select + ├── columns: i:2!null j:3!null k:4 + ├── key: (2,3) + ├── fd: (2,3)-->(4) + ├── scan t87790 + │ ├── columns: i:2!null j:3!null k:4 + │ ├── key: (2,3) + │ └── fd: (2,3)-->(4) + └── filters + └── i:2 IN (1, 2, 3) [outer=(2), constraints=(/2: [/1 - /1] [/2 - /2] [/3 - /3]; tight)] + +norm expect=InlineAnyWithScan +WITH v(a,b) AS (VALUES (1,2)) +SELECT 1 FROM t87790 WHERE i IN (SELECT a FROM v) +UNION ALL +SELECT 1 FROM t87790 WHERE j IN (SELECT b FROM v) +---- +union-all + ├── columns: "?column?":19!null + ├── left columns: "?column?":10 + ├── right columns: "?column?":18 + ├── project + │ ├── columns: "?column?":10!null + │ ├── fd: ()-->(10) + │ ├── select + │ │ ├── columns: i:3!null + │ │ ├── fd: ()-->(3) + │ │ ├── scan t87790 + │ │ │ └── columns: i:3!null + │ │ └── filters + │ │ └── i:3 = 1 [outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] + │ └── projections + │ └── 1 [as="?column?":10] + └── project + ├── columns: "?column?":18!null + ├── fd: ()-->(18) + ├── select + │ ├── columns: j:12!null + │ ├── fd: ()-->(12) + │ ├── scan t87790 + │ │ └── columns: j:12!null + │ └── filters + │ └── j:12 = 2 [outer=(12), constraints=(/12: [/2 - /2]; tight), fd=()-->(12)] + └── projections + └── 1 [as="?column?":18] diff --git a/pkg/sql/opt/norm/with_funcs.go b/pkg/sql/opt/norm/with_funcs.go index 06e6b583df47..706e97fed759 100644 --- a/pkg/sql/opt/norm/with_funcs.go +++ b/pkg/sql/opt/norm/with_funcs.go @@ -13,6 +13,7 @@ package norm import ( "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" + "github.com/cockroachdb/errors" ) // CanInlineWith returns whether or not it's valid to inline binding in expr. @@ -61,3 +62,68 @@ func (c *CustomFuncs) InlineWith(binding, input memo.RelExpr, priv *memo.WithPri return replace(input).(memo.RelExpr) } + +// CanInlineWithScan returns whether or not it's valid and heuristically cheaper +// to inline a WithScanExpr with its bound expression from the memo. Currently +// this only allows inlining leak-proof constant VALUES clauses of the form +// `column IN (VALUES(...))` or `column NOT IN(VALUES(...))`, but could likely +// be extended to handle other expressions in the future. +func (c *CustomFuncs) CanInlineWithScan(private *memo.WithScanPrivate, scalar opt.ScalarExpr) bool { + if !private.CanInlineInPlace { + return false + } + // If we don't have `column IN(...)` or `column NOT IN(...)` or + // (col1, col2 ... coln) IN/NOT IN (...), it is not cheaper to inline because + // we wouldn't be avoiding one or more joins. + if tupleExpr, ok := scalar.(*memo.TupleExpr); ok { + for _, scalarExpr := range tupleExpr.Elems { + if scalarExpr.Op() != opt.VariableOp { + return false + } + } + } else if scalar.Op() != opt.VariableOp { + return false + } + expr := c.mem.Metadata().WithBinding(private.With) + var valuesExpr *memo.ValuesExpr + var ok bool + if valuesExpr, ok = expr.(*memo.ValuesExpr); !ok { + return false + } + if !valuesExpr.IsConstantsAndPlaceholders() { + return false + } + return true +} + +// InlineWithScan replaces a WithScanExpr with its bound expression, mapped to +// new output ColumnIDs. +func (c *CustomFuncs) InlineWithScan(private *memo.WithScanPrivate) memo.RelExpr { + expr := c.mem.Metadata().WithBinding(private.With) + var valuesExpr *memo.ValuesExpr + var ok bool + valuesExpr.Op() + if valuesExpr, ok = expr.(*memo.ValuesExpr); !ok { + // Didn't find the expected VALUES. + panic(errors.AssertionFailedf("attempt to inline a WithScan which is not a VALUES clause; operator: %s", + expr.Op().String())) + } + projections := make(memo.ProjectionsExpr, len(private.InCols)) + for i := range private.InCols { + projections[i] = c.f.ConstructProjectionsItem( + c.f.ConstructVariable(private.InCols[i]), + private.OutCols[i], + ) + } + // Shallow copy the values in case this WITH binding is inlined more than + // once. + newRows := make(memo.ScalarListExpr, len(valuesExpr.Rows)) + copy(newRows, valuesExpr.Rows) + newCols := make(opt.ColList, len(valuesExpr.ValuesPrivate.Cols)) + copy(newCols, valuesExpr.ValuesPrivate.Cols) + newValuesExpr := c.f.ConstructValues(newRows, &memo.ValuesPrivate{ + Cols: newCols, + ID: c.f.Metadata().NextUniqueID(), + }) + return c.f.ConstructProject(newValuesExpr, projections, opt.ColSet{}) +} diff --git a/pkg/sql/opt/ops/relational.opt b/pkg/sql/opt/ops/relational.opt index 2d1fc5f8a140..e8b6a85c3d1d 100644 --- a/pkg/sql/opt/ops/relational.opt +++ b/pkg/sql/opt/ops/relational.opt @@ -1313,6 +1313,11 @@ define WithScanPrivate { # most cases the column set is sufficient to do this, but various rules make # it possible to construct WithScan expressions with no columns. ID UniqueID + + # CanInlineInPlace is true when the WithScanExpr is allowed to be inlined + # when it is normalized as opposed to inlining which takes place when the + # WithExpr is normalized. + CanInlineInPlace bool } # RecursiveCTE implements the logic of a recursive CTE: diff --git a/pkg/sql/opt/optbuilder/select.go b/pkg/sql/opt/optbuilder/select.go index 252f24cd0571..3264d0df8754 100644 --- a/pkg/sql/opt/optbuilder/select.go +++ b/pkg/sql/opt/optbuilder/select.go @@ -99,11 +99,12 @@ func (b *Builder) buildDataSource( } outScope.expr = b.factory.ConstructWithScan(&memo.WithScanPrivate{ - With: cte.id, - Name: string(cte.name.Alias), - InCols: inCols, - OutCols: outCols, - ID: b.factory.Metadata().NextUniqueID(), + With: cte.id, + Name: string(cte.name.Alias), + InCols: inCols, + OutCols: outCols, + ID: b.factory.Metadata().NextUniqueID(), + CanInlineInPlace: !cte.mtr.Set || !cte.mtr.Materialize, }) return outScope @@ -205,11 +206,12 @@ func (b *Builder) buildDataSource( } outScope.expr = b.factory.ConstructWithScan(&memo.WithScanPrivate{ - With: cte.id, - Name: string(cte.name.Alias), - InCols: inCols, - OutCols: outCols, - ID: b.factory.Metadata().NextUniqueID(), + With: cte.id, + Name: string(cte.name.Alias), + InCols: inCols, + OutCols: outCols, + ID: b.factory.Metadata().NextUniqueID(), + CanInlineInPlace: !cte.mtr.Set || !cte.mtr.Materialize, }) return outScope