Skip to content

Commit

Permalink
opt: hoist uncorrelated equality subqueries
Browse files Browse the repository at this point in the history
Subqueries that are in equality expressions with a variable are now
hoisted. When these expressions exist in a filter, hoisting the subquery
can allow the main query to plan a lookup join, rather than an
inefficient full-table scan.

For example, consider the table and query:

    CREATE TABLE t (
      a INT,
      INDEX (a)
    );

    SELECT * FROM t WHERE a = (SELECT max(a) FROM t);

Prior to this commit, the query plan for this query required a full
table scan:

    select
     ├── columns: a:1
     ├── scan t@t_a_idx
     │    ├── columns: a:1
     │    └── constraint: /1/2: (/NULL - ]
     └── filters
          └── eq
               ├── a:1
               └── subquery
                    └── scalar-group-by
                         ├── columns: max:9
                         ├── scan t@t_a_idx,rev
                         │    ├── columns: a:5
                         │    ├── constraint: /5/6: (/NULL - ]
                         │    └── limit: 1(rev)
                         └── aggregations
                              └── const-agg [as=max:9, outer=(5)]
                                   └── a:5

By hoisting the subquery, the full table scan is replaced with a lookup
join:

    project
     ├── columns: a:1
     └── inner-join (lookup t@t_a_idx)
          ├── columns: a:1 max:9
          ├── key columns: [9] = [1]
          ├── scalar-group-by
          │    ├── columns: max:9
          │    ├── scan t@t_a_idx,rev
          │    │    ├── columns: a:5
          │    │    ├── constraint: /5/6: (/NULL - ]
          │    │    └── limit: 1(rev)
          │    └── aggregations
          │         └── const-agg [as=max:9, outer=(5)]
          │              └── a:5
          └── filters (true)

This hoisting is disabled by default, but can be enabled by setting the
`optimizer_hoist_uncorrelated_equality_subqueries` session setting to
`true`.

Fixes cockroachdb#83392
Informs cockroachdb#51820
Informs cockroachdb#93829
Informs cockroachdb#100855

Release note (performance improvement): Queries that have subqueries in
equality expressions are now more efficiently planned by the optimizer when
`optimizer_hoist_uncorrelated_equality_subqueries` is set to `true`.
  • Loading branch information
mgartner committed Apr 18, 2023
1 parent c7f7106 commit f5bed07
Show file tree
Hide file tree
Showing 10 changed files with 295 additions and 31 deletions.
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3513,6 +3513,10 @@ func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) {
m.data.OptimizerAlwaysUseHistograms = val
}

func (m *sessionDataMutator) SetOptimizerHoistUncorrelatedEqualitySubqueries(val bool) {
m.data.OptimizerHoistUncorrelatedEqualitySubqueries = val
}

func (m *sessionDataMutator) SetEnableCreateStatsUsingExtremes(val bool) {
m.data.EnableCreateStatsUsingExtremes = val
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -5272,6 +5272,7 @@ on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer on
optimizer_always_use_histograms on
optimizer_hoist_uncorrelated_equality_subqueries off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2759,6 +2759,7 @@ null_ordered_last off NULL
on_update_rehome_row_enabled on NULL NULL NULL string
opt_split_scan_limit 2048 NULL NULL NULL string
optimizer_always_use_histograms on NULL NULL NULL string
optimizer_hoist_uncorrelated_equality_subqueries off NULL NULL NULL string
optimizer_use_forecasts on NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_disjunction_stats on NULL NULL NULL string
Expand Down Expand Up @@ -2912,6 +2913,7 @@ null_ordered_last off NULL
on_update_rehome_row_enabled on NULL user NULL on on
opt_split_scan_limit 2048 NULL user NULL 2048 2048
optimizer_always_use_histograms on NULL user NULL on on
optimizer_hoist_uncorrelated_equality_subqueries off NULL user NULL on on
optimizer_use_forecasts on NULL user NULL on on
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_disjunction_stats on NULL user NULL on on
Expand Down Expand Up @@ -3065,6 +3067,7 @@ on_update_rehome_row_enabled NULL NULL NULL
opt_split_scan_limit NULL NULL NULL NULL NULL
optimizer NULL NULL NULL NULL NULL
optimizer_always_use_histograms NULL NULL NULL NULL NULL
optimizer_hoist_uncorrelated_equality_subqueries NULL NULL NULL NULL NULL
optimizer_use_forecasts NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer_always_use_histograms on
optimizer_hoist_uncorrelated_equality_subqueries off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ type Memo struct {
useLimitOrderingForStreamingGroupBy bool
useImprovedSplitDisjunctionForJoins bool
alwaysUseHistograms bool
hoistUncorrelatedEqualitySubqueries bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -221,6 +222,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy,
useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins,
alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms,
hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries,
}
m.metadata.Init()
m.logPropsBuilder.init(ctx, evalCtx, m)
Expand Down Expand Up @@ -362,7 +364,8 @@ func (m *Memo) IsStale(
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats ||
m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy ||
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins ||
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms {
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms ||
m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerAlwaysUseHistograms = false
notStale()

// Stale optimizer_hoist_uncorrelated_equality_subqueries.
evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = true
stale()
evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = false
notStale()

// Stale data sources and schema. Create new catalog so that data sources are
// recreated and can be modified independently.
catalog = testcat.New()
Expand Down
23 changes: 22 additions & 1 deletion pkg/sql/opt/norm/decorrelate_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,20 @@ func (c *CustomFuncs) deriveHasHoistableSubquery(scalar opt.ScalarExpr) bool {
case *memo.UDFExpr:
// Do not attempt to hoist UDFs.
return false

case *memo.EqExpr:
// Hoist subqueries in expressions like (Eq (Variable) (Subquery)) if
// the corresponding session setting is enabled.
// TODO(mgartner): We could hoist if we have an IS NOT DISTINCT FROM
// expression. But it won't currently lead to a lookup join due to
// #100855 and the plan could be worse, so we avoid it for now.
if c.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries {
_, isLeftVar := scalar.Child(0).(*memo.VariableExpr)
_, isRightSubquery := scalar.Child(1).(*memo.SubqueryExpr)
if isLeftVar && isRightSubquery {
return true
}
}
}

// If HasHoistableSubquery is true for any child, then it's true for this
Expand Down Expand Up @@ -807,7 +821,14 @@ func (r *subqueryHoister) hoistAll(scalar opt.ScalarExpr) opt.ScalarExpr {
switch scalar.Op() {
case opt.SubqueryOp, opt.ExistsOp, opt.AnyOp, opt.ArrayFlattenOp:
subquery := scalar.Child(0).(memo.RelExpr)
if subquery.Relational().OuterCols.Empty() {
// According to the implementation of deriveHasHoistableSubquery,
// Exists, Any, and ArrayFlatten expressions are only hoistable if they
// are correlated. Uncorrelated subquery expressions are hoistable if
// the corresponding session setting is enabled and they are part of an
// equality expression with a variable.
uncorrelatedHoistAllowed := scalar.Op() == opt.SubqueryOp &&
r.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries
if subquery.Relational().OuterCols.Empty() && !uncorrelatedHoistAllowed {
break
}

Expand Down
Loading

0 comments on commit f5bed07

Please sign in to comment.