Skip to content

Commit

Permalink
opt: add session setting for join elimination optimization
Browse files Browse the repository at this point in the history
We recently added support for column remapping in the join elimination
rules that allows columns from the eliminated input of the join to be
mapped to equivalent columns from the preserved input. This allows
joins to be eliminated in more cases - in particular, the self-join
patterns that can arise from an `UPDATE ... FROM` statement where the
table in the `FROM` clause is the same as the table being updated.

This patch adds a setting, `optimizer_use_improved_join_elimination`,
which gates the column-remapping logic for the join-elimination rules.
The plan is to backport the column-remapping changes to 23.1 behind
this setting turned off by default.

Informs cockroachdb#102614

Release note: None
  • Loading branch information
DrewKimball committed Jul 20, 2023
1 parent f360c1b commit 50adc77
Show file tree
Hide file tree
Showing 14 changed files with 95 additions and 7 deletions.
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3608,6 +3608,10 @@ func (m *sessionDataMutator) SetReplicationMode(val sessiondatapb.ReplicationMod
m.data.ReplicationMode = val
}

func (m *sessionDataMutator) SetOptimizerUseImprovedJoinElimination(val bool) {
m.data.OptimizerUseImprovedJoinElimination = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -5350,6 +5350,7 @@ optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_computed_column_filters_derivation on
optimizer_use_improved_disjunction_stats on
optimizer_use_improved_join_elimination on
optimizer_use_improved_split_disjunction_for_joins on
optimizer_use_limit_ordering_for_streaming_group_by on
optimizer_use_multicol_stats on
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2785,6 +2785,7 @@ optimizer_use_forecasts on N
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_computed_column_filters_derivation on NULL NULL NULL string
optimizer_use_improved_disjunction_stats on NULL NULL NULL string
optimizer_use_improved_join_elimination on NULL NULL NULL string
optimizer_use_improved_split_disjunction_for_joins on NULL NULL NULL string
optimizer_use_limit_ordering_for_streaming_group_by on NULL NULL NULL string
optimizer_use_multicol_stats on NULL NULL NULL string
Expand Down Expand Up @@ -2944,6 +2945,7 @@ optimizer_use_forecasts on N
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_computed_column_filters_derivation on NULL user NULL on on
optimizer_use_improved_disjunction_stats on NULL user NULL on on
optimizer_use_improved_join_elimination on NULL user NULL on on
optimizer_use_improved_split_disjunction_for_joins on NULL user NULL on on
optimizer_use_limit_ordering_for_streaming_group_by on NULL user NULL on on
optimizer_use_multicol_stats on NULL user NULL on on
Expand Down Expand Up @@ -3102,6 +3104,7 @@ optimizer_use_forecasts NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_computed_column_filters_derivation NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
optimizer_use_improved_join_elimination NULL NULL NULL NULL NULL
optimizer_use_improved_split_disjunction_for_joins NULL NULL NULL NULL NULL
optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL
optimizer_use_multicol_stats NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_computed_column_filters_derivation on
optimizer_use_improved_disjunction_stats on
optimizer_use_improved_join_elimination on
optimizer_use_improved_split_disjunction_for_joins on
optimizer_use_limit_ordering_for_streaming_group_by on
optimizer_use_multicol_stats on
Expand Down
7 changes: 5 additions & 2 deletions pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2258,6 +2258,7 @@ func addOuterColsToFuncDep(outerCols opt.ColSet, fdset *props.FuncDepSet) {
// joins that are used internally when deriving logical properties and
// statistics.
type joinPropsHelper struct {
evalCtx *eval.Context
join RelExpr
joinType opt.Operator

Expand All @@ -2276,7 +2277,7 @@ type joinPropsHelper struct {
func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
// This initialization pattern ensures that fields are not unwittingly
// reused. Field reuse must be explicit.
*h = joinPropsHelper{join: joinExpr}
*h = joinPropsHelper{evalCtx: b.evalCtx, join: joinExpr}

switch join := joinExpr.(type) {
case *LookupJoinExpr:
Expand Down Expand Up @@ -2514,7 +2515,9 @@ func (h *joinPropsHelper) setFuncDeps(rel *props.Relational) {
// created new possibilities for simplifying removed columns.
rel.FuncDeps.ProjectCols(rel.OutputCols)
}
h.addSelfJoinImpliedFDs(rel)
if h.evalCtx.SessionData().OptimizerUseImprovedJoinElimination {
h.addSelfJoinImpliedFDs(rel)
}
}

// addSelfJoinImpliedFDs adds any extra equality FDs that are implied by a self
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ type Memo struct {
alwaysUseHistograms bool
hoistUncorrelatedEqualitySubqueries bool
useImprovedComputedColumnFiltersDerivation bool
useImprovedJoinElimination bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -228,6 +229,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms,
hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries,
useImprovedComputedColumnFiltersDerivation: evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation,
useImprovedJoinElimination: evalCtx.SessionData().OptimizerUseImprovedJoinElimination,
}
m.metadata.Init()
m.logPropsBuilder.init(ctx, evalCtx, m)
Expand Down Expand Up @@ -372,7 +374,8 @@ func (m *Memo) IsStale(
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins ||
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms ||
m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries ||
m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation {
m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation ||
m.useImprovedJoinElimination != evalCtx.SessionData().OptimizerUseImprovedJoinElimination {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation = false
notStale()

// Stale optimizer_use_improved_join_elimination.
evalCtx.SessionData().OptimizerUseImprovedJoinElimination = true
stale()
evalCtx.SessionData().OptimizerUseImprovedJoinElimination = false
notStale()

// User no longer has access to view.
catalog.View(tree.NewTableNameWithSchema("t", catconstants.PublicSchemaName, "abcview")).Revoked = true
_, err = o.Memo().IsStale(ctx, &evalCtx, catalog)
Expand Down
7 changes: 7 additions & 0 deletions pkg/sql/opt/norm/project_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -908,3 +908,10 @@ func (c *CustomFuncs) RemapProjectionCols(
}
return *(replace(&projections).(*memo.ProjectionsExpr))
}

// CanUseImprovedJoinElimination returns true if either no column remapping is
// required in order to eliminate the join, or column remapping is enabled by
// OptimizerUseImprovedJoinElimination.
func (c *CustomFuncs) CanUseImprovedJoinElimination(from, to opt.ColSet) bool {
return c.f.evalCtx.SessionData().OptimizerUseImprovedJoinElimination || from.SubsetOf(to)
}
2 changes: 2 additions & 0 deletions pkg/sql/opt/norm/rules/groupby.opt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
$leftCols
$fds:(FuncDeps $input)
) &
(CanUseImprovedJoinElimination $toRemap $leftCols) &
(CanEliminateJoinUnderGroupByLeft $input $aggs)
)
=>
Expand Down Expand Up @@ -114,6 +115,7 @@
$rightCols
$fds:(FuncDeps $input)
) &
(CanUseImprovedJoinElimination $toRemap $rightCols) &
(CanEliminateJoinUnderGroupByRight $input $aggs)
)
=>
Expand Down
10 changes: 6 additions & 4 deletions pkg/sql/opt/norm/rules/project.opt
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@
$projections:*
$passthrough:* &
(CanRemapCols
(UnionCols
$fromCols:(UnionCols
$passthrough
(ProjectionOuterCols $projections)
)
$leftCols:(OutputCols $left)
$fds:(FuncDeps $join)
)
) &
(CanUseImprovedJoinElimination $fromCols $leftCols)
)
=>
(Project
Expand All @@ -56,13 +57,14 @@
$projections:*
$passthrough:* &
(CanRemapCols
(UnionCols
$fromCols:(UnionCols
$passthrough
(ProjectionOuterCols $projections)
)
$rightCols:(OutputCols $right)
$fds:(FuncDeps $join)
)
) &
(CanUseImprovedJoinElimination $fromCols $rightCols)
)
=>
(Project
Expand Down
34 changes: 34 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/project
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,40 @@ project
└── projections
└── child.parent_id:2 [as=parent_id:9, outer=(2)]

# The join can be eliminated as long as it doesn't require remapping with
# OptimizerUseImprovedJoinElimination disabled.
norm set=optimizer_use_improved_join_elimination=false expect=EliminateJoinUnderProjectLeft
SELECT b.x, b.z FROM b LEFT JOIN a ON b.x = a.x
----
scan b
├── columns: x:1!null z:2
├── key: (1)
└── fd: (1)-->(2)

# The join cannot be eliminated with OptimizerUseImprovedJoinElimination
# disabled.
norm set=optimizer_use_improved_join_elimination=false expect-not=EliminateJoinUnderProjectLeft
SELECT b.j, b1.x FROM b INNER JOIN b AS b1 ON b.x = b1.x
----
project
├── columns: j:3 x:6!null
├── key: (6)
├── fd: (6)-->(3)
└── inner-join (hash)
├── columns: b.x:1!null b.j:3 b1.x:6!null
├── multiplicity: left-rows(exactly-one), right-rows(exactly-one)
├── key: (6)
├── fd: (1)-->(3), (1)==(6), (6)==(1)
├── scan b
│ ├── columns: b.x:1!null b.j:3
│ ├── key: (1)
│ └── fd: (1)-->(3)
├── scan b [as=b1]
│ ├── columns: b1.x:6!null
│ └── key: (6)
└── filters
└── b.x:1 = b1.x:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]

# --------------------------------------------------
# EliminateJoinUnderProjectRight
# --------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/testutils/opttester/opt_tester.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ func New(catalog cat.Catalog, sql string) *OptTester {
ot.evalCtx.SessionData().OptimizerAlwaysUseHistograms = true
ot.evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = true
ot.evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation = true
ot.evalCtx.SessionData().OptimizerUseImprovedJoinElimination = true

return ot
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/sessiondatapb/local_only_session_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,10 @@ message LocalOnlySessionData {
// ReplicationMode represents the replication parameter passed in during
// connection time.
ReplicationMode replication_mode = 106;
// OptimizerUseImprovedJoinElimination, when true, allows the optimizer to
// eliminate joins in more cases by remapping columns from the eliminated
// input of the join to equivalent columns from the preserved input.
bool optimizer_use_improved_join_elimination = 107;

///////////////////////////////////////////////////////////////////////////
// WARNING: consider whether a session parameter you're adding needs to //
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -2800,6 +2800,23 @@ var varGen = map[string]sessionVar{
return strconv.FormatInt(maxConn, 10), nil
},
},

// CockroachDB extension.
`optimizer_use_improved_join_elimination`: {
GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_improved_join_elimination`),
Set: func(_ context.Context, m sessionDataMutator, s string) error {
b, err := paramparse.ParseBoolVar("optimizer_use_improved_join_elimination", s)
if err != nil {
return err
}
m.SetOptimizerUseImprovedJoinElimination(b)
return nil
},
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerUseImprovedJoinElimination), nil
},
GlobalDefault: globalTrue,
},
}

func ReplicationModeFromString(s string) (sessiondatapb.ReplicationMode, error) {
Expand Down

0 comments on commit 50adc77

Please sign in to comment.