Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-23.1: opt: hoist uncorrelated equality subqueries #101753

Merged
merged 1 commit into from
Apr 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3517,6 +3517,10 @@ func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) {
m.data.OptimizerAlwaysUseHistograms = val
}

func (m *sessionDataMutator) SetOptimizerHoistUncorrelatedEqualitySubqueries(val bool) {
m.data.OptimizerHoistUncorrelatedEqualitySubqueries = val
}

func (m *sessionDataMutator) SetEnableCreateStatsUsingExtremes(val bool) {
m.data.EnableCreateStatsUsingExtremes = val
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -5281,6 +5281,7 @@ on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer on
optimizer_always_use_histograms on
optimizer_hoist_uncorrelated_equality_subqueries off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2761,6 +2761,7 @@ null_ordered_last off NULL
on_update_rehome_row_enabled on NULL NULL NULL string
opt_split_scan_limit 2048 NULL NULL NULL string
optimizer_always_use_histograms on NULL NULL NULL string
optimizer_hoist_uncorrelated_equality_subqueries off NULL NULL NULL string
optimizer_use_forecasts on NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_disjunction_stats on NULL NULL NULL string
Expand Down Expand Up @@ -2915,6 +2916,7 @@ null_ordered_last off NULL
on_update_rehome_row_enabled on NULL user NULL on on
opt_split_scan_limit 2048 NULL user NULL 2048 2048
optimizer_always_use_histograms on NULL user NULL on on
optimizer_hoist_uncorrelated_equality_subqueries off NULL user NULL off off
optimizer_use_forecasts on NULL user NULL on on
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_disjunction_stats on NULL user NULL on on
Expand Down Expand Up @@ -3069,6 +3071,7 @@ on_update_rehome_row_enabled NULL NULL NULL
opt_split_scan_limit NULL NULL NULL NULL NULL
optimizer NULL NULL NULL NULL NULL
optimizer_always_use_histograms NULL NULL NULL NULL NULL
optimizer_hoist_uncorrelated_equality_subqueries NULL NULL NULL NULL NULL
optimizer_use_forecasts NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer_always_use_histograms on
optimizer_hoist_uncorrelated_equality_subqueries off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ type Memo struct {
useLimitOrderingForStreamingGroupBy bool
useImprovedSplitDisjunctionForJoins bool
alwaysUseHistograms bool
hoistUncorrelatedEqualitySubqueries bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -221,6 +222,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy,
useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins,
alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms,
hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries,
}
m.metadata.Init()
m.logPropsBuilder.init(ctx, evalCtx, m)
Expand Down Expand Up @@ -362,7 +364,8 @@ func (m *Memo) IsStale(
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats ||
m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy ||
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins ||
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms {
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms ||
m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerAlwaysUseHistograms = false
notStale()

// Stale optimizer_hoist_uncorrelated_equality_subqueries.
evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = true
stale()
evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = false
notStale()

// Stale data sources and schema. Create new catalog so that data sources are
// recreated and can be modified independently.
catalog = testcat.New()
Expand Down
23 changes: 22 additions & 1 deletion pkg/sql/opt/norm/decorrelate_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,20 @@ func (c *CustomFuncs) deriveHasHoistableSubquery(scalar opt.ScalarExpr) bool {
case *memo.UDFExpr:
// Do not attempt to hoist UDFs.
return false

case *memo.EqExpr:
// Hoist subqueries in expressions like (Eq (Variable) (Subquery)) if
// the corresponding session setting is enabled.
// TODO(mgartner): We could hoist if we have an IS NOT DISTINCT FROM
// expression. But it won't currently lead to a lookup join due to
// #100855 and the plan could be worse, so we avoid it for now.
if c.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries {
_, isLeftVar := scalar.Child(0).(*memo.VariableExpr)
_, isRightSubquery := scalar.Child(1).(*memo.SubqueryExpr)
if isLeftVar && isRightSubquery {
return true
}
}
}

// If HasHoistableSubquery is true for any child, then it's true for this
Expand Down Expand Up @@ -807,7 +821,14 @@ func (r *subqueryHoister) hoistAll(scalar opt.ScalarExpr) opt.ScalarExpr {
switch scalar.Op() {
case opt.SubqueryOp, opt.ExistsOp, opt.AnyOp, opt.ArrayFlattenOp:
subquery := scalar.Child(0).(memo.RelExpr)
if subquery.Relational().OuterCols.Empty() {
// According to the implementation of deriveHasHoistableSubquery,
// Exists, Any, and ArrayFlatten expressions are only hoistable if they
// are correlated. Uncorrelated subquery expressions are hoistable if
// the corresponding session setting is enabled and they are part of an
// equality expression with a variable.
uncorrelatedHoistAllowed := scalar.Op() == opt.SubqueryOp &&
r.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries
if subquery.Relational().OuterCols.Empty() && !uncorrelatedHoistAllowed {
break
}

Expand Down
236 changes: 227 additions & 9 deletions pkg/sql/opt/norm/testdata/rules/decorrelate
Original file line number Diff line number Diff line change
Expand Up @@ -4715,6 +4715,129 @@ project
│ └── false
└── true_agg:18 IS NOT NULL

# Hoist an uncorrelated equality subquery.
norm expect=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on
SELECT * FROM a WHERE k = (SELECT max(x) FROM xy)
----
project
├── columns: k:1!null i:2 f:3 s:4 j:5
├── cardinality: [0 - 1]
├── key: ()
├── fd: ()-->(1-5)
└── inner-join (hash)
├── columns: k:1!null i:2 f:3 s:4 j:5 max:12!null
├── cardinality: [0 - 1]
├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one)
├── key: ()
├── fd: ()-->(1-5,12), (12)==(1), (1)==(12)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
├── scalar-group-by
│ ├── columns: max:12
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ ├── fd: ()-->(12)
│ ├── scan xy
│ │ ├── columns: x:8!null
│ │ └── key: (8)
│ └── aggregations
│ └── max [as=max:12, outer=(8)]
│ └── x:8
└── filters
└── k:1 = max:12 [outer=(1,12), constraints=(/1: (/NULL - ]; /12: (/NULL - ]), fd=(1)==(12), (12)==(1)]

# Hoist an uncorrelated equality subquery that could return multiple rows.
norm expect=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on
SELECT * FROM a WHERE k = (SELECT x FROM xy)
----
project
├── columns: k:1!null i:2 f:3 s:4 j:5
├── cardinality: [0 - 1]
├── key: ()
├── fd: ()-->(1-5)
└── inner-join (hash)
├── columns: k:1!null i:2 f:3 s:4 j:5 x:8!null
├── cardinality: [0 - 1]
├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one)
├── key: ()
├── fd: ()-->(1-5,8), (8)==(1), (1)==(8)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
├── max1-row
│ ├── columns: x:8!null
│ ├── error: "more than one row returned by a subquery used as an expression"
│ ├── cardinality: [0 - 1]
│ ├── key: ()
│ ├── fd: ()-->(8)
│ └── scan xy
│ ├── columns: x:8!null
│ └── key: (8)
└── filters
└── k:1 = x:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)]

# Do not hoist an uncorrelated equality subquery if the corresponding session
# setting is disabled.
norm expect-not=HoistSelectSubquery
SELECT * FROM a WHERE k = (SELECT max(x) FROM xy)
----
select
├── columns: k:1!null i:2 f:3 s:4 j:5
├── key: (1)
├── fd: (1)-->(2-5)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
└── filters
└── eq [outer=(1), subquery, constraints=(/1: (/NULL - ])]
├── k:1
└── subquery
└── scalar-group-by
├── columns: max:12
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(12)
├── scan xy
│ ├── columns: x:8!null
│ └── key: (8)
└── aggregations
└── max [as=max:12, outer=(8)]
└── x:8

# Do not hoist an uncorrelated inequality subquery. We have not yet proven that
# it will lead to a better plan.
norm expect-not=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on
SELECT * FROM a WHERE k < (SELECT max(x) FROM xy)
----
select
├── columns: k:1!null i:2 f:3 s:4 j:5
├── key: (1)
├── fd: (1)-->(2-5)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
└── filters
└── lt [outer=(1), subquery, constraints=(/1: (/NULL - ])]
├── k:1
└── subquery
└── scalar-group-by
├── columns: max:12
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(12)
├── scan xy
│ ├── columns: x:8!null
│ └── key: (8)
└── aggregations
└── max [as=max:12, outer=(8)]
└── x:8


# --------------------------------------------------
# HoistProjectSubquery
# --------------------------------------------------
Expand Down Expand Up @@ -5119,19 +5242,114 @@ values
│ └── true [as=column16:16]
└── false

# Don't hoist uncorrelated subquery.
norm
SELECT i < ANY(SELECT y FROM xy) AS r FROM a
# Hoist an uncorrelated equality subquery.
norm expect=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on
SELECT k = (SELECT max(x) FROM xy) FROM a
----
project
├── columns: r:12
├── columns: "?column?":13
├── inner-join (cross)
│ ├── columns: k:1!null max:12
│ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
│ ├── key: (1)
│ ├── fd: ()-->(12)
│ ├── scan a
│ │ ├── columns: k:1!null
│ │ └── key: (1)
│ ├── scalar-group-by
│ │ ├── columns: max:12
│ │ ├── cardinality: [1 - 1]
│ │ ├── key: ()
│ │ ├── fd: ()-->(12)
│ │ ├── scan xy
│ │ │ ├── columns: x:8!null
│ │ │ └── key: (8)
│ │ └── aggregations
│ │ └── max [as=max:12, outer=(8)]
│ │ └── x:8
│ └── filters (true)
└── projections
└── k:1 = max:12 [as="?column?":13, outer=(1,12)]

# Hoist an uncorrelated equality subquery that could return multiple rows.
norm expect=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on
SELECT k = (SELECT x FROM xy) FROM a
----
project
├── columns: "?column?":12
├── left-join (cross)
│ ├── columns: k:1!null x:8
│ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
│ ├── key: (1)
│ ├── fd: (1)-->(8)
│ ├── scan a
│ │ ├── columns: k:1!null
│ │ └── key: (1)
│ ├── max1-row
│ │ ├── columns: x:8!null
│ │ ├── error: "more than one row returned by a subquery used as an expression"
│ │ ├── cardinality: [0 - 1]
│ │ ├── key: ()
│ │ ├── fd: ()-->(8)
│ │ └── scan xy
│ │ ├── columns: x:8!null
│ │ └── key: (8)
│ └── filters (true)
└── projections
└── k:1 = x:8 [as="?column?":12, outer=(1,8)]

# Do not hoist an uncorrelated equality subquery if the corresponding session
# setting is disabled.
norm expect-not=HoistProjectSubquery
SELECT k = (SELECT max(x) FROM xy) FROM a
----
project
├── columns: "?column?":13
├── scan a
│ └── columns: i:2
│ ├── columns: k:1!null
│ └── key: (1)
└── projections
└── eq [as="?column?":13, outer=(1), subquery]
├── k:1
└── subquery
└── scalar-group-by
├── columns: max:12
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(12)
├── scan xy
│ ├── columns: x:8!null
│ └── key: (8)
└── aggregations
└── max [as=max:12, outer=(8)]
└── x:8

# Do not hoist an uncorrelated inequality subquery. We have not yet proven that
# it will lead to a better plan.
norm expect-not=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on
SELECT k < (SELECT max(x) FROM xy) FROM a
----
project
├── columns: "?column?":13
├── scan a
│ ├── columns: k:1!null
│ └── key: (1)
└── projections
└── any: lt [as=r:12, outer=(2), correlated-subquery]
├── scan xy
│ └── columns: y:9
└── i:2
└── lt [as="?column?":13, outer=(1), subquery]
├── k:1
└── subquery
└── scalar-group-by
├── columns: max:12
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(12)
├── scan xy
│ ├── columns: x:8!null
│ └── key: (8)
└── aggregations
└── max [as=max:12, outer=(8)]
└── x:8


# --------------------------------------------------
# HoistJoinSubquery
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/sessiondatapb/local_only_session_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,11 @@ message LocalOnlySessionData {
// parallelization will still be disabled for queries with LIMITs, and it can
// lead to increased likelihood of OOMs.
bool unbounded_parallel_scans = 101;
// OptimizerHoistUncorrelatedEqualitySubqueries, when true, causes the
// optimizer to hoist uncorrelated subqueries that are part of an equality
// expression with a column reference, which can produce more efficient query
// plans.
bool optimizer_hoist_uncorrelated_equality_subqueries = 102;

///////////////////////////////////////////////////////////////////////////
// WARNING: consider whether a session parameter you're adding needs to //
Expand Down
Loading