diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index b28d9bfe6c1d..07c273b33706 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3608,6 +3608,10 @@ func (m *sessionDataMutator) SetReplicationMode(val sessiondatapb.ReplicationMod m.data.ReplicationMode = val } +func (m *sessionDataMutator) SetOptimizerUseImprovedJoinElimination(val bool) { + m.data.OptimizerUseImprovedJoinElimination = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 80a76e257ae9..80bd5413f8c9 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -5350,6 +5350,7 @@ optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_computed_column_filters_derivation on optimizer_use_improved_disjunction_stats on +optimizer_use_improved_join_elimination on optimizer_use_improved_split_disjunction_for_joins on optimizer_use_limit_ordering_for_streaming_group_by on optimizer_use_multicol_stats on diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index b71f725f45fd..e343ce238b82 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2785,6 +2785,7 @@ optimizer_use_forecasts on N optimizer_use_histograms on NULL NULL NULL string optimizer_use_improved_computed_column_filters_derivation on NULL NULL NULL string optimizer_use_improved_disjunction_stats on NULL NULL NULL string +optimizer_use_improved_join_elimination on NULL NULL NULL string optimizer_use_improved_split_disjunction_for_joins on NULL NULL NULL string optimizer_use_limit_ordering_for_streaming_group_by on NULL NULL NULL string optimizer_use_multicol_stats on NULL NULL NULL string @@ -2944,6 +2945,7 @@ optimizer_use_forecasts on N optimizer_use_histograms on NULL user NULL on on optimizer_use_improved_computed_column_filters_derivation on NULL user NULL on on optimizer_use_improved_disjunction_stats on NULL user NULL on on +optimizer_use_improved_join_elimination on NULL user NULL on on optimizer_use_improved_split_disjunction_for_joins on NULL user NULL on on optimizer_use_limit_ordering_for_streaming_group_by on NULL user NULL on on optimizer_use_multicol_stats on NULL user NULL on on @@ -3102,6 +3104,7 @@ optimizer_use_forecasts NULL NULL NULL optimizer_use_histograms NULL NULL NULL NULL NULL optimizer_use_improved_computed_column_filters_derivation NULL NULL NULL NULL NULL optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL +optimizer_use_improved_join_elimination NULL NULL NULL NULL NULL optimizer_use_improved_split_disjunction_for_joins NULL NULL NULL NULL NULL optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL optimizer_use_multicol_stats NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 9b48ef44d2fa..65d5af2a1f41 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -120,6 +120,7 @@ optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_computed_column_filters_derivation on optimizer_use_improved_disjunction_stats on +optimizer_use_improved_join_elimination on optimizer_use_improved_split_disjunction_for_joins on optimizer_use_limit_ordering_for_streaming_group_by on optimizer_use_multicol_stats on diff --git a/pkg/sql/opt/memo/logical_props_builder.go b/pkg/sql/opt/memo/logical_props_builder.go index c050e0eb2802..cc9bc90b6480 100644 --- a/pkg/sql/opt/memo/logical_props_builder.go +++ b/pkg/sql/opt/memo/logical_props_builder.go @@ -2258,6 +2258,7 @@ func addOuterColsToFuncDep(outerCols opt.ColSet, fdset *props.FuncDepSet) { // joins that are used internally when deriving logical properties and // statistics. type joinPropsHelper struct { + evalCtx *eval.Context join RelExpr joinType opt.Operator @@ -2276,7 +2277,7 @@ type joinPropsHelper struct { func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) { // This initialization pattern ensures that fields are not unwittingly // reused. Field reuse must be explicit. - *h = joinPropsHelper{join: joinExpr} + *h = joinPropsHelper{evalCtx: b.evalCtx, join: joinExpr} switch join := joinExpr.(type) { case *LookupJoinExpr: @@ -2514,7 +2515,9 @@ func (h *joinPropsHelper) setFuncDeps(rel *props.Relational) { // created new possibilities for simplifying removed columns. rel.FuncDeps.ProjectCols(rel.OutputCols) } - h.addSelfJoinImpliedFDs(rel) + if h.evalCtx.SessionData().OptimizerUseImprovedJoinElimination { + h.addSelfJoinImpliedFDs(rel) + } } // addSelfJoinImpliedFDs adds any extra equality FDs that are implied by a self diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index 9cd060dae529..8a9de11fc073 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -167,6 +167,7 @@ type Memo struct { alwaysUseHistograms bool hoistUncorrelatedEqualitySubqueries bool useImprovedComputedColumnFiltersDerivation bool + useImprovedJoinElimination bool // curRank is the highest currently in-use scalar expression rank. curRank opt.ScalarRank @@ -228,6 +229,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) { alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms, hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries, useImprovedComputedColumnFiltersDerivation: evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation, + useImprovedJoinElimination: evalCtx.SessionData().OptimizerUseImprovedJoinElimination, } m.metadata.Init() m.logPropsBuilder.init(ctx, evalCtx, m) @@ -372,7 +374,8 @@ func (m *Memo) IsStale( m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins || m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms || m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries || - m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation { + m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation || + m.useImprovedJoinElimination != evalCtx.SessionData().OptimizerUseImprovedJoinElimination { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 61ea58ca5b41..a485259ee2f0 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -373,6 +373,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation = false notStale() + // Stale optimizer_use_improved_join_elimination. + evalCtx.SessionData().OptimizerUseImprovedJoinElimination = true + stale() + evalCtx.SessionData().OptimizerUseImprovedJoinElimination = false + notStale() + // User no longer has access to view. catalog.View(tree.NewTableNameWithSchema("t", catconstants.PublicSchemaName, "abcview")).Revoked = true _, err = o.Memo().IsStale(ctx, &evalCtx, catalog) diff --git a/pkg/sql/opt/norm/project_funcs.go b/pkg/sql/opt/norm/project_funcs.go index e964af80395a..d7f5346d6b4b 100644 --- a/pkg/sql/opt/norm/project_funcs.go +++ b/pkg/sql/opt/norm/project_funcs.go @@ -908,3 +908,10 @@ func (c *CustomFuncs) RemapProjectionCols( } return *(replace(&projections).(*memo.ProjectionsExpr)) } + +// CanUseImprovedJoinElimination returns true if either no column remapping is +// required in order to eliminate the join, or column remapping is enabled by +// OptimizerUseImprovedJoinElimination. +func (c *CustomFuncs) CanUseImprovedJoinElimination(from, to opt.ColSet) bool { + return c.f.evalCtx.SessionData().OptimizerUseImprovedJoinElimination || from.SubsetOf(to) +} diff --git a/pkg/sql/opt/norm/rules/groupby.opt b/pkg/sql/opt/norm/rules/groupby.opt index 2bdd2cddf9fe..41669433afff 100644 --- a/pkg/sql/opt/norm/rules/groupby.opt +++ b/pkg/sql/opt/norm/rules/groupby.opt @@ -79,6 +79,7 @@ $leftCols $fds:(FuncDeps $input) ) & + (CanUseImprovedJoinElimination $toRemap $leftCols) & (CanEliminateJoinUnderGroupByLeft $input $aggs) ) => @@ -114,6 +115,7 @@ $rightCols $fds:(FuncDeps $input) ) & + (CanUseImprovedJoinElimination $toRemap $rightCols) & (CanEliminateJoinUnderGroupByRight $input $aggs) ) => diff --git a/pkg/sql/opt/norm/rules/project.opt b/pkg/sql/opt/norm/rules/project.opt index 08861772f550..252eded2ad7a 100644 --- a/pkg/sql/opt/norm/rules/project.opt +++ b/pkg/sql/opt/norm/rules/project.opt @@ -27,13 +27,14 @@ $projections:* $passthrough:* & (CanRemapCols - (UnionCols + $fromCols:(UnionCols $passthrough (ProjectionOuterCols $projections) ) $leftCols:(OutputCols $left) $fds:(FuncDeps $join) - ) + ) & + (CanUseImprovedJoinElimination $fromCols $leftCols) ) => (Project @@ -56,13 +57,14 @@ $projections:* $passthrough:* & (CanRemapCols - (UnionCols + $fromCols:(UnionCols $passthrough (ProjectionOuterCols $projections) ) $rightCols:(OutputCols $right) $fds:(FuncDeps $join) - ) + ) & + (CanUseImprovedJoinElimination $fromCols $rightCols) ) => (Project diff --git a/pkg/sql/opt/norm/testdata/rules/project b/pkg/sql/opt/norm/testdata/rules/project index 911638848adc..65e302bc5419 100644 --- a/pkg/sql/opt/norm/testdata/rules/project +++ b/pkg/sql/opt/norm/testdata/rules/project @@ -266,6 +266,40 @@ project └── projections └── child.parent_id:2 [as=parent_id:9, outer=(2)] +# The join can be eliminated as long as it doesn't require remapping with +# OptimizerUseImprovedJoinElimination disabled. +norm set=optimizer_use_improved_join_elimination=false expect=EliminateJoinUnderProjectLeft +SELECT b.x, b.z FROM b LEFT JOIN a ON b.x = a.x +---- +scan b + ├── columns: x:1!null z:2 + ├── key: (1) + └── fd: (1)-->(2) + +# The join cannot be eliminated with OptimizerUseImprovedJoinElimination +# disabled. +norm set=optimizer_use_improved_join_elimination=false expect-not=EliminateJoinUnderProjectLeft +SELECT b.j, b1.x FROM b INNER JOIN b AS b1 ON b.x = b1.x +---- +project + ├── columns: j:3 x:6!null + ├── key: (6) + ├── fd: (6)-->(3) + └── inner-join (hash) + ├── columns: b.x:1!null b.j:3 b1.x:6!null + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) + ├── key: (6) + ├── fd: (1)-->(3), (1)==(6), (6)==(1) + ├── scan b + │ ├── columns: b.x:1!null b.j:3 + │ ├── key: (1) + │ └── fd: (1)-->(3) + ├── scan b [as=b1] + │ ├── columns: b1.x:6!null + │ └── key: (6) + └── filters + └── b.x:1 = b1.x:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + # -------------------------------------------------- # EliminateJoinUnderProjectRight # -------------------------------------------------- diff --git a/pkg/sql/opt/testutils/opttester/opt_tester.go b/pkg/sql/opt/testutils/opttester/opt_tester.go index 24bc6a5d93bf..77554d7e0de5 100644 --- a/pkg/sql/opt/testutils/opttester/opt_tester.go +++ b/pkg/sql/opt/testutils/opttester/opt_tester.go @@ -301,6 +301,7 @@ func New(catalog cat.Catalog, sql string) *OptTester { ot.evalCtx.SessionData().OptimizerAlwaysUseHistograms = true ot.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = true ot.evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation = true + ot.evalCtx.SessionData().OptimizerUseImprovedJoinElimination = true return ot } diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index ba35b92714ea..8a44717a6fb9 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -405,6 +405,10 @@ message LocalOnlySessionData { // ReplicationMode represents the replication parameter passed in during // connection time. ReplicationMode replication_mode = 106; + // OptimizerUseImprovedJoinElimination, when true, allows the optimizer to + // eliminate joins in more cases by remapping columns from the eliminated + // input of the join to equivalent columns from the preserved input. + bool optimizer_use_improved_join_elimination = 107; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 4ae64e8ae58f..fd00afe1c7a5 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2800,6 +2800,23 @@ var varGen = map[string]sessionVar{ return strconv.FormatInt(maxConn, 10), nil }, }, + + // CockroachDB extension. + `optimizer_use_improved_join_elimination`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_improved_join_elimination`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_use_improved_join_elimination", s) + if err != nil { + return err + } + m.SetOptimizerUseImprovedJoinElimination(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerUseImprovedJoinElimination), nil + }, + GlobalDefault: globalTrue, + }, } func ReplicationModeFromString(s string) (sessiondatapb.ReplicationMode, error) {