diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 297ed87a97cb..2c5d7671caa3 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3394,6 +3394,10 @@ func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) { m.data.OptimizerAlwaysUseHistograms = val } +func (m *sessionDataMutator) SetOptimizerHoistUncorrelatedEqualitySubqueries(val bool) { + m.data.OptimizerHoistUncorrelatedEqualitySubqueries = val +} + func (m *sessionDataMutator) SetPreparedStatementsCacheSize(val int64) { m.data.PreparedStatementsCacheSize = val } diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 46697aee9367..28c06a9154c3 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4785,6 +4785,7 @@ on_update_rehome_row_enabled on opt_split_scan_limit 2048 optimizer on optimizer_always_use_histograms off +optimizer_hoist_uncorrelated_equality_subqueries off optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_disjunction_stats off diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 971030d54433..c4376944d987 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2807,6 +2807,7 @@ null_ordered_last off NULL on_update_rehome_row_enabled on NULL NULL NULL string opt_split_scan_limit 2048 NULL NULL NULL string optimizer_always_use_histograms off NULL NULL NULL string +optimizer_hoist_uncorrelated_equality_subqueries off NULL NULL NULL string optimizer_use_forecasts on NULL NULL NULL string optimizer_use_histograms on NULL NULL NULL string optimizer_use_improved_disjunction_stats off NULL NULL NULL string @@ -2952,6 +2953,7 @@ null_ordered_last off NULL on_update_rehome_row_enabled on NULL user NULL on on opt_split_scan_limit 2048 NULL user NULL 2048 2048 optimizer_always_use_histograms off NULL user NULL off off +optimizer_hoist_uncorrelated_equality_subqueries off NULL user NULL off off optimizer_use_forecasts on NULL user NULL on on optimizer_use_histograms on NULL user NULL on on optimizer_use_improved_disjunction_stats off NULL user NULL off off @@ -3095,6 +3097,7 @@ on_update_rehome_row_enabled NULL NULL NULL opt_split_scan_limit NULL NULL NULL NULL NULL optimizer NULL NULL NULL NULL NULL optimizer_always_use_histograms NULL NULL NULL NULL NULL +optimizer_hoist_uncorrelated_equality_subqueries NULL NULL NULL NULL NULL optimizer_use_forecasts NULL NULL NULL NULL NULL optimizer_use_histograms NULL NULL NULL NULL NULL optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 8f0b5d9d4ef8..63508d6d63f4 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -106,6 +106,7 @@ null_ordered_last off on_update_rehome_row_enabled on opt_split_scan_limit 2048 optimizer_always_use_histograms off +optimizer_hoist_uncorrelated_equality_subqueries off optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_disjunction_stats off diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index d083fddd5b51..cb182cad664b 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -162,6 +162,7 @@ type Memo struct { useLimitOrderingForStreamingGroupBy bool useImprovedSplitDisjunctionForJoins bool alwaysUseHistograms bool + hoistUncorrelatedEqualitySubqueries bool // curRank is the highest currently in-use scalar expression rank. curRank opt.ScalarRank @@ -219,6 +220,7 @@ func (m *Memo) Init(evalCtx *eval.Context) { useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy, useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins, alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms, + hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries, } m.metadata.Init() m.logPropsBuilder.init(evalCtx, m) @@ -359,7 +361,8 @@ func (m *Memo) IsStale( m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats || m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy || m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins || - m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms { + m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms || + m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index c5e38a84b614..dab142eff126 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -340,6 +340,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerAlwaysUseHistograms = false notStale() + // Stale optimizer_hoist_uncorrelated_equality_subqueries. + evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = true + stale() + evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = false + notStale() + // Stale data sources and schema. Create new catalog so that data sources are // recreated and can be modified independently. catalog = testcat.New() diff --git a/pkg/sql/opt/norm/decorrelate_funcs.go b/pkg/sql/opt/norm/decorrelate_funcs.go index 96b7308e0176..12abf35809f0 100644 --- a/pkg/sql/opt/norm/decorrelate_funcs.go +++ b/pkg/sql/opt/norm/decorrelate_funcs.go @@ -66,6 +66,20 @@ func (c *CustomFuncs) deriveHasHoistableSubquery(scalar opt.ScalarExpr) bool { case *memo.UDFExpr: // Do not attempt to hoist UDFs. return false + + case *memo.EqExpr: + // Hoist subqueries in expressions like (Eq (Variable) (Subquery)) if + // the corresponding session setting is enabled. + // TODO(mgartner): We could hoist if we have an IS NOT DISTINCT FROM + // expression. But it won't currently lead to a lookup join due to + // #100855 and the plan could be worse, so we avoid it for now. + if c.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries { + _, isLeftVar := scalar.Child(0).(*memo.VariableExpr) + _, isRightSubquery := scalar.Child(1).(*memo.SubqueryExpr) + if isLeftVar && isRightSubquery { + return true + } + } } // If HasHoistableSubquery is true for any child, then it's true for this @@ -796,7 +810,14 @@ func (r *subqueryHoister) hoistAll(scalar opt.ScalarExpr) opt.ScalarExpr { switch scalar.Op() { case opt.SubqueryOp, opt.ExistsOp, opt.AnyOp, opt.ArrayFlattenOp: subquery := scalar.Child(0).(memo.RelExpr) - if subquery.Relational().OuterCols.Empty() { + // According to the implementation of deriveHasHoistableSubquery, + // Exists, Any, and ArrayFlatten expressions are only hoistable if they + // are correlated. Uncorrelated subquery expressions are hoistable if + // the corresponding session setting is enabled and they are part of an + // equality expression with a variable. + uncorrelatedHoistAllowed := scalar.Op() == opt.SubqueryOp && + r.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries + if subquery.Relational().OuterCols.Empty() && !uncorrelatedHoistAllowed { break } diff --git a/pkg/sql/opt/norm/testdata/rules/decorrelate b/pkg/sql/opt/norm/testdata/rules/decorrelate index 683fee0dc8de..da98feaa7e1b 100644 --- a/pkg/sql/opt/norm/testdata/rules/decorrelate +++ b/pkg/sql/opt/norm/testdata/rules/decorrelate @@ -4585,6 +4585,128 @@ project │ └── 1 └── true_agg:17 IS NOT NULL +# Hoist an uncorrelated equality subquery. +norm expect=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT * FROM a WHERE k = (SELECT max(x) FROM xy) +---- +project + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(1-5) + └── inner-join (hash) + ├── columns: k:1!null i:2 f:3 s:4 j:5 max:12!null + ├── cardinality: [0 - 1] + ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one) + ├── key: () + ├── fd: ()-->(1-5,12), (12)==(1), (1)==(12) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + ├── scalar-group-by + │ ├── columns: max:12 + │ ├── cardinality: [1 - 1] + │ ├── key: () + │ ├── fd: ()-->(12) + │ ├── scan xy + │ │ ├── columns: x:8!null + │ │ └── key: (8) + │ └── aggregations + │ └── max [as=max:12, outer=(8)] + │ └── x:8 + └── filters + └── k:1 = max:12 [outer=(1,12), constraints=(/1: (/NULL - ]; /12: (/NULL - ]), fd=(1)==(12), (12)==(1)] + +# Hoist an uncorrelated equality subquery that could return multiple rows. +norm expect=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT * FROM a WHERE k = (SELECT x FROM xy) +---- +project + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(1-5) + └── inner-join (hash) + ├── columns: k:1!null i:2 f:3 s:4 j:5 x:8!null + ├── cardinality: [0 - 1] + ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one) + ├── key: () + ├── fd: ()-->(1-5,8), (8)==(1), (1)==(8) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + ├── max1-row + │ ├── columns: x:8!null + │ ├── error: "more than one row returned by a subquery used as an expression" + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(8) + │ └── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── filters + └── k:1 = x:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)] + +# Do not hoist an uncorrelated equality subquery if the corresponding session +# setting is disabled. +norm expect-not=HoistSelectSubquery +SELECT * FROM a WHERE k = (SELECT max(x) FROM xy) +---- +select + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── key: (1) + ├── fd: (1)-->(2-5) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + └── filters + └── eq [outer=(1), subquery, constraints=(/1: (/NULL - ])] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + +# Do not hoist an uncorrelated inequality subquery. We have not yet proven that +# it will lead to a better plan. +norm expect-not=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT * FROM a WHERE k < (SELECT max(x) FROM xy) +---- +select + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── key: (1) + ├── fd: (1)-->(2-5) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + └── filters + └── lt [outer=(1), subquery, constraints=(/1: (/NULL - ])] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + # -------------------------------------------------- # HoistProjectSubquery # -------------------------------------------------- @@ -4896,19 +5018,114 @@ values │ └── y:9 = i:2 [outer=(2,9), constraints=(/2: (/NULL - ]; /9: (/NULL - ]), fd=(2)==(9), (9)==(2)] └── 1 -# Don't hoist uncorrelated subquery. -norm -SELECT i < ANY(SELECT y FROM xy) AS r FROM a +# Hoist an uncorrelated equality subquery. +norm expect=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT k = (SELECT max(x) FROM xy) FROM a ---- project - ├── columns: r:12 + ├── columns: "?column?":13 + ├── inner-join (cross) + │ ├── columns: k:1!null max:12 + │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + │ ├── key: (1) + │ ├── fd: ()-->(12) + │ ├── scan a + │ │ ├── columns: k:1!null + │ │ └── key: (1) + │ ├── scalar-group-by + │ │ ├── columns: max:12 + │ │ ├── cardinality: [1 - 1] + │ │ ├── key: () + │ │ ├── fd: ()-->(12) + │ │ ├── scan xy + │ │ │ ├── columns: x:8!null + │ │ │ └── key: (8) + │ │ └── aggregations + │ │ └── max [as=max:12, outer=(8)] + │ │ └── x:8 + │ └── filters (true) + └── projections + └── k:1 = max:12 [as="?column?":13, outer=(1,12)] + +# Hoist an uncorrelated equality subquery that could return multiple rows. +norm expect=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT k = (SELECT x FROM xy) FROM a +---- +project + ├── columns: "?column?":12 + ├── left-join (cross) + │ ├── columns: k:1!null x:8 + │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + │ ├── key: (1) + │ ├── fd: (1)-->(8) + │ ├── scan a + │ │ ├── columns: k:1!null + │ │ └── key: (1) + │ ├── max1-row + │ │ ├── columns: x:8!null + │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ ├── cardinality: [0 - 1] + │ │ ├── key: () + │ │ ├── fd: ()-->(8) + │ │ └── scan xy + │ │ ├── columns: x:8!null + │ │ └── key: (8) + │ └── filters (true) + └── projections + └── k:1 = x:8 [as="?column?":12, outer=(1,8)] + +# Do not hoist an uncorrelated equality subquery if the corresponding session +# setting is disabled. +norm expect-not=HoistProjectSubquery +SELECT k = (SELECT max(x) FROM xy) FROM a +---- +project + ├── columns: "?column?":13 ├── scan a - │ └── columns: i:2 + │ ├── columns: k:1!null + │ └── key: (1) + └── projections + └── eq [as="?column?":13, outer=(1), subquery] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + +# Do not hoist an uncorrelated inequality subquery. We have not yet proven that +# it will lead to a better plan. +norm expect-not=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT k < (SELECT max(x) FROM xy) FROM a +---- +project + ├── columns: "?column?":13 + ├── scan a + │ ├── columns: k:1!null + │ └── key: (1) └── projections - └── any: lt [as=r:12, outer=(2), correlated-subquery] - ├── scan xy - │ └── columns: y:9 - └── i:2 + └── lt [as="?column?":13, outer=(1), subquery] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + # -------------------------------------------------- # HoistJoinSubquery diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index cda430ce6c8d..aa91f2fbaa00 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -334,6 +334,11 @@ message LocalOnlySessionData { // parallelization will still be disabled for queries with LIMITs, and it can // lead to increased likelihood of OOMs. bool unbounded_parallel_scans = 101; + // OptimizerHoistUncorrelatedEqualitySubqueries, when true, causes the + // optimizer to hoist uncorrelated subqueries that are part of an equality + // expression with a column reference, which can produce more efficient query + // plans. + bool optimizer_hoist_uncorrelated_equality_subqueries = 102; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 5da29daa43b0..b58215cf6765 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2417,6 +2417,23 @@ var varGen = map[string]sessionVar{ GlobalDefault: globalFalse, }, + // CockroachDB extension. + `optimizer_hoist_uncorrelated_equality_subqueries`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_hoist_uncorrelated_equality_subqueries`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_hoist_uncorrelated_equality_subqueries", s) + if err != nil { + return err + } + m.SetOptimizerHoistUncorrelatedEqualitySubqueries(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries), nil + }, + GlobalDefault: globalFalse, + }, + // CockroachDB extension. `prepared_statements_cache_size`: { Set: func(_ context.Context, m sessionDataMutator, s string) error {