diff --git a/pkg/sql/opt/metadata.go b/pkg/sql/opt/metadata.go index e7c0e82a0c57..744d5c94379e 100644 --- a/pkg/sql/opt/metadata.go +++ b/pkg/sql/opt/metadata.go @@ -115,6 +115,10 @@ type Metadata struct { // mutation operators, used to determine the logical properties of WithScan. withBindings map[WithID]Expr + // hoistedUncorrelatedSubqueries is used to track uncorrelated subqueries + // that have been hoisted. + hoistedUncorrelatedSubqueries map[Expr]struct{} + // dataSourceDeps stores each data source object that the query depends on. dataSourceDeps map[cat.StableID]cat.DataSource @@ -1062,6 +1066,24 @@ func (md *Metadata) ForEachWithBinding(fn func(WithID, Expr)) { } } +// AddHoistedUncorrelatedSubquery marks the given uncorrelated subquery +// expression as hoisted. It is used to prevent hoisting the same uncorrelated +// subquery twice because that may cause two children of an expression to have +// intersecting columns (see #114703). +func (md *Metadata) AddHoistedUncorrelatedSubquery(subquery Expr) { + if md.hoistedUncorrelatedSubqueries == nil { + md.hoistedUncorrelatedSubqueries = make(map[Expr]struct{}) + } + md.hoistedUncorrelatedSubqueries[subquery] = struct{}{} +} + +// IsHoistedUncorrelatedSubquery returns true if the given subquery was +// previously marked as hoisted with AddHoistedUncorrelatedSubquery. +func (md *Metadata) IsHoistedUncorrelatedSubquery(subquery Expr) bool { + _, ok := md.hoistedUncorrelatedSubqueries[subquery] + return ok +} + // TestingDataSourceDeps exposes the dataSourceDeps for testing. func (md *Metadata) TestingDataSourceDeps() map[cat.StableID]cat.DataSource { return md.dataSourceDeps diff --git a/pkg/sql/opt/norm/decorrelate_funcs.go b/pkg/sql/opt/norm/decorrelate_funcs.go index 45fdf5fc53ab..d8169ef5ff4c 100644 --- a/pkg/sql/opt/norm/decorrelate_funcs.go +++ b/pkg/sql/opt/norm/decorrelate_funcs.go @@ -68,15 +68,22 @@ func (c *CustomFuncs) deriveHasHoistableSubquery(scalar opt.ScalarExpr) bool { return false case *memo.EqExpr: - // Hoist subqueries in expressions like (Eq (Variable) (Subquery)) if - // the corresponding session setting is enabled. + // Hoist subqueries in expressions like (Eq (Variable) (Subquery)) if: + // + // 1. The corresponding session setting is enabled. + // 2. And, the subquery has not already been hoisted elsewhere in the + // expression tree. Hoisting the same subquery twice could result + // in query plans where two children of an expression have + // intersecting columns (see #114703). + // // TODO(mgartner): We could hoist if we have an IS NOT DISTINCT FROM // expression. But it won't currently lead to a lookup join due to // #100855 and the plan could be worse, so we avoid it for now. if c.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries { _, isLeftVar := scalar.Child(0).(*memo.VariableExpr) - _, isRightSubquery := scalar.Child(1).(*memo.SubqueryExpr) - if isLeftVar && isRightSubquery { + subquery, isRightSubquery := scalar.Child(1).(*memo.SubqueryExpr) + if isLeftVar && isRightSubquery && + !c.f.Metadata().IsHoistedUncorrelatedSubquery(subquery) { return true } } @@ -824,12 +831,18 @@ func (r *subqueryHoister) hoistAll(scalar opt.ScalarExpr) opt.ScalarExpr { // According to the implementation of deriveHasHoistableSubquery, // Exists, Any, and ArrayFlatten expressions are only hoistable if they // are correlated. Uncorrelated subquery expressions are hoistable if - // the corresponding session setting is enabled and they are part of an - // equality expression with a variable. - uncorrelatedHoistAllowed := scalar.Op() == opt.SubqueryOp && - r.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries - if subquery.Relational().OuterCols.Empty() && !uncorrelatedHoistAllowed { - break + // the corresponding session setting is enabled, they are part of an + // equality expression with a variable, and they have not already been + // hoisted elsewhere in the expression tree. + if subquery.Relational().OuterCols.Empty() { + uncorrelatedHoistAllowed := scalar.Op() == opt.SubqueryOp && + r.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries && + !r.f.Metadata().IsHoistedUncorrelatedSubquery(scalar) + if !uncorrelatedHoistAllowed { + break + } + // Mark the subquery as being hoisted. + r.f.Metadata().AddHoistedUncorrelatedSubquery(scalar) } switch t := scalar.(type) { diff --git a/pkg/sql/opt/norm/testdata/rules/decorrelate b/pkg/sql/opt/norm/testdata/rules/decorrelate index 2114a367e644..2fe2cad8fcce 100644 --- a/pkg/sql/opt/norm/testdata/rules/decorrelate +++ b/pkg/sql/opt/norm/testdata/rules/decorrelate @@ -4847,6 +4847,177 @@ select └── max [as=max:12, outer=(8)] └── x:8 +# The subquery should only be hoisted once to avoid creating an expression with +# children that have intersecting columns. See #114703. +norm expect=HoistSelectSubquery +SELECT NULL +FROM a AS t1 +JOIN a AS t2 ON t1.i = t2.i +WHERE t2.i = (SELECT 0 FROM a) +---- +project + ├── columns: "?column?":23 + ├── fd: ()-->(23) + ├── inner-join (hash) + │ ├── columns: t1.i:2!null t2.i:9!null "?column?":22!null + │ ├── fd: ()-->(2,9,22), (2)==(9,22), (22)==(2,9), (9)==(2,22) + │ ├── inner-join (hash) + │ │ ├── columns: t1.i:2!null "?column?":22!null + │ │ ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more) + │ │ ├── fd: ()-->(2,22), (2)==(22), (22)==(2) + │ │ ├── scan a [as=t1] + │ │ │ └── columns: t1.i:2 + │ │ ├── max1-row + │ │ │ ├── columns: "?column?":22!null + │ │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ │ ├── cardinality: [0 - 1] + │ │ │ ├── key: () + │ │ │ ├── fd: ()-->(22) + │ │ │ └── project + │ │ │ ├── columns: "?column?":22!null + │ │ │ ├── fd: ()-->(22) + │ │ │ ├── scan a + │ │ │ └── projections + │ │ │ └── 0 [as="?column?":22] + │ │ └── filters + │ │ └── t1.i:2 = "?column?":22 [outer=(2,22), constraints=(/2: (/NULL - ]; /22: (/NULL - ]), fd=(2)==(22), (22)==(2)] + │ ├── select + │ │ ├── columns: t2.i:9!null + │ │ ├── scan a [as=t2] + │ │ │ └── columns: t2.i:9 + │ │ └── filters + │ │ └── eq [outer=(9), subquery, constraints=(/9: (/NULL - ])] + │ │ ├── t2.i:9 + │ │ └── subquery + │ │ └── max1-row + │ │ ├── columns: "?column?":22!null + │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ ├── cardinality: [0 - 1] + │ │ ├── key: () + │ │ ├── fd: ()-->(22) + │ │ └── project + │ │ ├── columns: "?column?":22!null + │ │ ├── fd: ()-->(22) + │ │ ├── scan a + │ │ └── projections + │ │ └── 0 [as="?column?":22] + │ └── filters + │ └── t1.i:2 = t2.i:9 [outer=(2,9), constraints=(/2: (/NULL - ]; /9: (/NULL - ]), fd=(2)==(9), (9)==(2)] + └── projections + └── NULL [as="?column?":23] + +# Each subquery should only be hoisted once. +norm +SELECT NULL +FROM a AS t1 +JOIN a AS t2 ON t1.i = t2.i +WHERE t2.i = (SELECT 0 FROM a) AND t1.i = (SELECT 0 FROM a) +---- +project + ├── columns: "?column?":31 + ├── fd: ()-->(31) + ├── inner-join (hash) + │ ├── columns: t1.i:2!null t2.i:9!null "?column?":22!null "?column?":30!null + │ ├── fd: ()-->(2,9,22,30), (2)==(9,22,30), (30)==(2,9,22), (22)==(2,9,30), (9)==(2,22,30) + │ ├── inner-join (hash) + │ │ ├── columns: t1.i:2!null "?column?":22!null "?column?":30!null + │ │ ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more) + │ │ ├── fd: ()-->(2,22,30), (2)==(22,30), (30)==(2,22), (22)==(2,30) + │ │ ├── inner-join (hash) + │ │ │ ├── columns: t1.i:2!null "?column?":30!null + │ │ │ ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more) + │ │ │ ├── fd: ()-->(2,30), (2)==(30), (30)==(2) + │ │ │ ├── scan a [as=t1] + │ │ │ │ └── columns: t1.i:2 + │ │ │ ├── max1-row + │ │ │ │ ├── columns: "?column?":30!null + │ │ │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ ├── key: () + │ │ │ │ ├── fd: ()-->(30) + │ │ │ │ └── project + │ │ │ │ ├── columns: "?column?":30!null + │ │ │ │ ├── fd: ()-->(30) + │ │ │ │ ├── scan a + │ │ │ │ └── projections + │ │ │ │ └── 0 [as="?column?":30] + │ │ │ └── filters + │ │ │ └── t1.i:2 = "?column?":30 [outer=(2,30), constraints=(/2: (/NULL - ]; /30: (/NULL - ]), fd=(2)==(30), (30)==(2)] + │ │ ├── select + │ │ │ ├── columns: "?column?":22!null + │ │ │ ├── cardinality: [0 - 1] + │ │ │ ├── key: () + │ │ │ ├── fd: ()-->(22) + │ │ │ ├── max1-row + │ │ │ │ ├── columns: "?column?":22!null + │ │ │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ ├── key: () + │ │ │ │ ├── fd: ()-->(22) + │ │ │ │ └── project + │ │ │ │ ├── columns: "?column?":22!null + │ │ │ │ ├── fd: ()-->(22) + │ │ │ │ ├── scan a + │ │ │ │ └── projections + │ │ │ │ └── 0 [as="?column?":22] + │ │ │ └── filters + │ │ │ └── eq [outer=(22), subquery, constraints=(/22: (/NULL - ])] + │ │ │ ├── "?column?":22 + │ │ │ └── subquery + │ │ │ └── max1-row + │ │ │ ├── columns: "?column?":30!null + │ │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ │ ├── cardinality: [0 - 1] + │ │ │ ├── key: () + │ │ │ ├── fd: ()-->(30) + │ │ │ └── project + │ │ │ ├── columns: "?column?":30!null + │ │ │ ├── fd: ()-->(30) + │ │ │ ├── scan a + │ │ │ └── projections + │ │ │ └── 0 [as="?column?":30] + │ │ └── filters + │ │ └── t1.i:2 = "?column?":22 [outer=(2,22), constraints=(/2: (/NULL - ]; /22: (/NULL - ]), fd=(2)==(22), (22)==(2)] + │ ├── select + │ │ ├── columns: t2.i:9!null + │ │ ├── scan a [as=t2] + │ │ │ └── columns: t2.i:9 + │ │ └── filters + │ │ ├── eq [outer=(9), subquery, constraints=(/9: (/NULL - ])] + │ │ │ ├── t2.i:9 + │ │ │ └── subquery + │ │ │ └── max1-row + │ │ │ ├── columns: "?column?":22!null + │ │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ │ ├── cardinality: [0 - 1] + │ │ │ ├── key: () + │ │ │ ├── fd: ()-->(22) + │ │ │ └── project + │ │ │ ├── columns: "?column?":22!null + │ │ │ ├── fd: ()-->(22) + │ │ │ ├── scan a + │ │ │ └── projections + │ │ │ └── 0 [as="?column?":22] + │ │ └── eq [outer=(9), subquery, constraints=(/9: (/NULL - ])] + │ │ ├── t2.i:9 + │ │ └── subquery + │ │ └── max1-row + │ │ ├── columns: "?column?":30!null + │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ ├── cardinality: [0 - 1] + │ │ ├── key: () + │ │ ├── fd: ()-->(30) + │ │ └── project + │ │ ├── columns: "?column?":30!null + │ │ ├── fd: ()-->(30) + │ │ ├── scan a + │ │ └── projections + │ │ └── 0 [as="?column?":30] + │ └── filters + │ └── t1.i:2 = t2.i:9 [outer=(2,9), constraints=(/2: (/NULL - ]; /9: (/NULL - ]), fd=(2)==(9), (9)==(2)] + └── projections + └── NULL [as="?column?":31] + # -------------------------------------------------- # HoistProjectSubquery