diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 5e3eb45e77bc..31b30d569f59 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3513,6 +3513,10 @@ func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) { m.data.OptimizerAlwaysUseHistograms = val } +func (m *sessionDataMutator) SetOptimizerHoistUncorrelatedEqualitySubqueries(val bool) { + m.data.OptimizerHoistUncorrelatedEqualitySubqueries = val +} + func (m *sessionDataMutator) SetEnableCreateStatsUsingExtremes(val bool) { m.data.EnableCreateStatsUsingExtremes = val } diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 47d4154fc759..3c98218e2d85 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -5272,6 +5272,7 @@ on_update_rehome_row_enabled on opt_split_scan_limit 2048 optimizer on optimizer_always_use_histograms on +optimizer_hoist_uncorrelated_equality_subqueries off optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_disjunction_stats on diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 776e50b2b397..6cee4d32a8a2 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2759,6 +2759,7 @@ null_ordered_last off NULL on_update_rehome_row_enabled on NULL NULL NULL string opt_split_scan_limit 2048 NULL NULL NULL string optimizer_always_use_histograms on NULL NULL NULL string +optimizer_hoist_uncorrelated_equality_subqueries off NULL NULL NULL string optimizer_use_forecasts on NULL NULL NULL string optimizer_use_histograms on NULL NULL NULL string optimizer_use_improved_disjunction_stats on NULL NULL NULL string @@ -2912,6 +2913,7 @@ null_ordered_last off NULL on_update_rehome_row_enabled on NULL user NULL on on opt_split_scan_limit 2048 NULL user NULL 2048 2048 optimizer_always_use_histograms on NULL user NULL on on +optimizer_hoist_uncorrelated_equality_subqueries off NULL user NULL on on optimizer_use_forecasts on NULL user NULL on on optimizer_use_histograms on NULL user NULL on on optimizer_use_improved_disjunction_stats on NULL user NULL on on @@ -3065,6 +3067,7 @@ on_update_rehome_row_enabled NULL NULL NULL opt_split_scan_limit NULL NULL NULL NULL NULL optimizer NULL NULL NULL NULL NULL optimizer_always_use_histograms NULL NULL NULL NULL NULL +optimizer_hoist_uncorrelated_equality_subqueries NULL NULL NULL NULL NULL optimizer_use_forecasts NULL NULL NULL NULL NULL optimizer_use_histograms NULL NULL NULL NULL NULL optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 60f610cd5e99..ca71a9d14434 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -113,6 +113,7 @@ null_ordered_last off on_update_rehome_row_enabled on opt_split_scan_limit 2048 optimizer_always_use_histograms on +optimizer_hoist_uncorrelated_equality_subqueries off optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_disjunction_stats on diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index b592288b8e19..427fb5f6bc6e 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -163,6 +163,7 @@ type Memo struct { useLimitOrderingForStreamingGroupBy bool useImprovedSplitDisjunctionForJoins bool alwaysUseHistograms bool + hoistUncorrelatedEqualitySubqueries bool // curRank is the highest currently in-use scalar expression rank. curRank opt.ScalarRank @@ -221,6 +222,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) { useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy, useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins, alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms, + hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries, } m.metadata.Init() m.logPropsBuilder.init(ctx, evalCtx, m) @@ -362,7 +364,8 @@ func (m *Memo) IsStale( m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats || m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy || m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins || - m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms { + m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms || + m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 22497c47dae3..430c19bad99f 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -354,6 +354,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerAlwaysUseHistograms = false notStale() + // Stale optimizer_hoist_uncorrelated_equality_subqueries. + evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = true + stale() + evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = false + notStale() + // Stale data sources and schema. Create new catalog so that data sources are // recreated and can be modified independently. catalog = testcat.New() diff --git a/pkg/sql/opt/norm/decorrelate_funcs.go b/pkg/sql/opt/norm/decorrelate_funcs.go index 62547760a599..5181dab98bcb 100644 --- a/pkg/sql/opt/norm/decorrelate_funcs.go +++ b/pkg/sql/opt/norm/decorrelate_funcs.go @@ -66,6 +66,20 @@ func (c *CustomFuncs) deriveHasHoistableSubquery(scalar opt.ScalarExpr) bool { case *memo.UDFExpr: // Do not attempt to hoist UDFs. return false + + case *memo.EqExpr: + // Hoist subqueries in expressions like (Eq (Variable) (Subquery)) if + // the corresponding session setting is enabled. + // TODO(mgartner): We could hoist if we have an IS NOT DISTINCT FROM + // expression. But it won't currently lead to a lookup join due to + // #100855 and the plan could be worse, so we avoid it for now. + if c.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries { + _, isLeftVar := scalar.Child(0).(*memo.VariableExpr) + _, isRightSubquery := scalar.Child(1).(*memo.SubqueryExpr) + if isLeftVar && isRightSubquery { + return true + } + } } // If HasHoistableSubquery is true for any child, then it's true for this @@ -807,7 +821,14 @@ func (r *subqueryHoister) hoistAll(scalar opt.ScalarExpr) opt.ScalarExpr { switch scalar.Op() { case opt.SubqueryOp, opt.ExistsOp, opt.AnyOp, opt.ArrayFlattenOp: subquery := scalar.Child(0).(memo.RelExpr) - if subquery.Relational().OuterCols.Empty() { + // According to the implementation of deriveHasHoistableSubquery, + // Exists, Any, and ArrayFlatten expressions are only hoistable if they + // are correlated. Uncorrelated subquery expressions are hoistable if + // the corresponding session setting is enabled and they are part of an + // equality expression with a variable. + uncorrelatedHoistAllowed := scalar.Op() == opt.SubqueryOp && + r.f.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries + if subquery.Relational().OuterCols.Empty() && !uncorrelatedHoistAllowed { break } diff --git a/pkg/sql/opt/norm/testdata/rules/decorrelate b/pkg/sql/opt/norm/testdata/rules/decorrelate index 55cc60523511..65d47352d55e 100644 --- a/pkg/sql/opt/norm/testdata/rules/decorrelate +++ b/pkg/sql/opt/norm/testdata/rules/decorrelate @@ -4693,27 +4693,135 @@ project │ │ └── f:3 │ ├── const-agg [as=s:4, outer=(4)] │ │ └── s:4 - │ └── const-agg [as=j:5, outer=(5)] - │ └── j:5 + │ ├── const-agg [as=j:5, outer=(5)] + │ │ └── j:5 + │ └── const-agg [as=column16:16, outer=(16)] + │ └── column16:16 + └── filters + └── COALESCE(column16:16, false) OR (true_agg:18 IS NOT NULL) [outer=(16,18)] + +# Hoist an uncorrelated equality subquery. +norm expect=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT * FROM a WHERE k = (SELECT max(x) FROM xy) +---- +project + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(1-5) + └── inner-join (hash) + ├── columns: k:1!null i:2 f:3 s:4 j:5 max:12!null + ├── cardinality: [0 - 1] + ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one) + ├── key: () + ├── fd: ()-->(1-5,12), (12)==(1), (1)==(12) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + ├── scalar-group-by + │ ├── columns: max:12 + │ ├── cardinality: [1 - 1] + │ ├── key: () + │ ├── fd: ()-->(12) + │ ├── scan xy + │ │ ├── columns: x:8!null + │ │ └── key: (8) + │ └── aggregations + │ └── max [as=max:12, outer=(8)] + │ └── x:8 + └── filters + └── k:1 = max:12 [outer=(1,12), constraints=(/1: (/NULL - ]; /12: (/NULL - ]), fd=(1)==(12), (12)==(1)] + +# Hoist an uncorrelated equality subquery that could return multiple rows. +norm expect=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT * FROM a WHERE k = (SELECT x FROM xy) +---- +project + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(1-5) + └── inner-join (hash) + ├── columns: k:1!null i:2 f:3 s:4 j:5 x:8!null + ├── cardinality: [0 - 1] + ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one) + ├── key: () + ├── fd: ()-->(1-5,8), (8)==(1), (1)==(8) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + ├── max1-row + │ ├── columns: x:8!null + │ ├── error: "more than one row returned by a subquery used as an expression" + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(8) + │ └── scan xy + │ ├── columns: x:8!null + │ └── key: (8) └── filters - └── or [outer=(18), subquery] - ├── coalesce - │ ├── subquery - │ │ └── project - │ │ ├── columns: column16:16!null - │ │ ├── cardinality: [0 - 1] - │ │ ├── key: () - │ │ ├── fd: ()-->(16) - │ │ ├── limit - │ │ │ ├── cardinality: [0 - 1] - │ │ │ ├── key: () - │ │ │ ├── scan xy - │ │ │ │ └── limit hint: 1.00 - │ │ │ └── 1 - │ │ └── projections - │ │ └── true [as=column16:16] - │ └── false - └── true_agg:18 IS NOT NULL + └── k:1 = x:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)] + +# Do not hoist an uncorrelated equality subquery if the corresponding session +# setting is disabled. +norm expect-not=HoistSelectSubquery +SELECT * FROM a WHERE k = (SELECT max(x) FROM xy) +---- +select + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── key: (1) + ├── fd: (1)-->(2-5) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + └── filters + └── eq [outer=(1), subquery, constraints=(/1: (/NULL - ])] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + +# Do not hoist an uncorrelated inequality subquery. We have not yet proven that +# it will lead to a better plan. +norm expect-not=HoistSelectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT * FROM a WHERE k < (SELECT max(x) FROM xy) +---- +select + ├── columns: k:1!null i:2 f:3 s:4 j:5 + ├── key: (1) + ├── fd: (1)-->(2-5) + ├── scan a + │ ├── columns: k:1!null i:2 f:3 s:4 j:5 + │ ├── key: (1) + │ └── fd: (1)-->(2-5) + └── filters + └── lt [outer=(1), subquery, constraints=(/1: (/NULL - ])] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + # -------------------------------------------------- # HoistProjectSubquery @@ -5119,19 +5227,114 @@ values │ └── true [as=column16:16] └── false -# Don't hoist uncorrelated subquery. -norm -SELECT i < ANY(SELECT y FROM xy) AS r FROM a +# Hoist an uncorrelated equality subquery. +norm expect=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT k = (SELECT max(x) FROM xy) FROM a ---- project - ├── columns: r:12 + ├── columns: "?column?":13 + ├── inner-join (cross) + │ ├── columns: k:1!null max:12 + │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + │ ├── key: (1) + │ ├── fd: ()-->(12) + │ ├── scan a + │ │ ├── columns: k:1!null + │ │ └── key: (1) + │ ├── scalar-group-by + │ │ ├── columns: max:12 + │ │ ├── cardinality: [1 - 1] + │ │ ├── key: () + │ │ ├── fd: ()-->(12) + │ │ ├── scan xy + │ │ │ ├── columns: x:8!null + │ │ │ └── key: (8) + │ │ └── aggregations + │ │ └── max [as=max:12, outer=(8)] + │ │ └── x:8 + │ └── filters (true) + └── projections + └── k:1 = max:12 [as="?column?":13, outer=(1,12)] + +# Hoist an uncorrelated equality subquery that could return multiple rows. +norm expect=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT k = (SELECT x FROM xy) FROM a +---- +project + ├── columns: "?column?":12 + ├── left-join (cross) + │ ├── columns: k:1!null x:8 + │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + │ ├── key: (1) + │ ├── fd: (1)-->(8) + │ ├── scan a + │ │ ├── columns: k:1!null + │ │ └── key: (1) + │ ├── max1-row + │ │ ├── columns: x:8!null + │ │ ├── error: "more than one row returned by a subquery used as an expression" + │ │ ├── cardinality: [0 - 1] + │ │ ├── key: () + │ │ ├── fd: ()-->(8) + │ │ └── scan xy + │ │ ├── columns: x:8!null + │ │ └── key: (8) + │ └── filters (true) + └── projections + └── k:1 = x:8 [as="?column?":12, outer=(1,8)] + +# Do not hoist an uncorrelated equality subquery if the corresponding session +# setting is disabled. +norm expect-not=HoistProjectSubquery +SELECT k = (SELECT max(x) FROM xy) FROM a +---- +project + ├── columns: "?column?":13 ├── scan a - │ └── columns: i:2 + │ ├── columns: k:1!null + │ └── key: (1) + └── projections + └── eq [as="?column?":13, outer=(1), subquery] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + +# Do not hoist an uncorrelated inequality subquery. We have not yet proven that +# it will lead to a better plan. +norm expect-not=HoistProjectSubquery set=optimizer_hoist_uncorrelated_equality_subqueries=on +SELECT k < (SELECT max(x) FROM xy) FROM a +---- +project + ├── columns: "?column?":13 + ├── scan a + │ ├── columns: k:1!null + │ └── key: (1) └── projections - └── any: lt [as=r:12, outer=(2), correlated-subquery] - ├── scan xy - │ └── columns: y:9 - └── i:2 + └── lt [as="?column?":13, outer=(1), subquery] + ├── k:1 + └── subquery + └── scalar-group-by + ├── columns: max:12 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(12) + ├── scan xy + │ ├── columns: x:8!null + │ └── key: (8) + └── aggregations + └── max [as=max:12, outer=(8)] + └── x:8 + # -------------------------------------------------- # HoistJoinSubquery diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index 19914953017a..faf85496de16 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -374,6 +374,11 @@ message LocalOnlySessionData { // query without sub/post queries can be executed in interleaving manner, but // with a local execution plan. bool multiple_active_portals_enabled = 100; + // OptimizerHoistUncorrelatedEqualitySubqueries, when true, causes the + // optimizer to hoist uncorrelated subqueries that are part of an equality + // expression with a column reference, which can produce more efficient query + // plans. + bool optimizer_hoist_uncorrelated_equality_subqueries = 102; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 0651da4413ec..212c3394b0c5 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2568,6 +2568,23 @@ var varGen = map[string]sessionVar{ GlobalDefault: globalTrue, }, + // CockroachDB extension. + `optimizer_hoist_uncorrelated_equality_subqueries`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_hoist_uncorrelated_equality_subqueries`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_hoist_uncorrelated_equality_subqueries", s) + if err != nil { + return err + } + m.SetOptimizerHoistUncorrelatedEqualitySubqueries(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries), nil + }, + GlobalDefault: globalFalse, + }, + // CockroachDB extension. `enable_create_stats_using_extremes`: { GetStringVal: makePostgresBoolGetStringValFn(`enable_create_stats_using_extremes`),