Skip to content

Commit

Permalink
Merge #98194
Browse files Browse the repository at this point in the history
98194: opt: add setting to always use histograms to calculate stats r=rytaft a=rytaft

Informs #64570

Release note (sql change): Added a new session setting, `optimizer_always_use_histograms`, which ensures that the optimizer always uses histograms when available to calculate the statistics of every plan that it explores. Enabling this setting can prevent the optimizer from choosing a suboptimal index when statistics for a table are stale.

Co-authored-by: Rebecca Taft <[email protected]>
  • Loading branch information
craig[bot] and rytaft committed Mar 8, 2023
2 parents 1b162d1 + ef1604f commit 329a232
Show file tree
Hide file tree
Showing 10 changed files with 369 additions and 1 deletion.
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3466,6 +3466,10 @@ func (m *sessionDataMutator) SetEnforceHomeRegionFollowerReadsEnabled(val bool)
m.data.EnforceHomeRegionFollowerReadsEnabled = val
}

func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) {
m.data.OptimizerAlwaysUseHistograms = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -5016,6 +5016,7 @@ null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer on
optimizer_always_use_histograms off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2647,6 +2647,7 @@ node_id 1 NULL
null_ordered_last off NULL NULL NULL string
on_update_rehome_row_enabled on NULL NULL NULL string
opt_split_scan_limit 2048 NULL NULL NULL string
optimizer_always_use_histograms off NULL NULL NULL string
optimizer_use_forecasts on NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_disjunction_stats on NULL NULL NULL string
Expand Down Expand Up @@ -2794,6 +2795,7 @@ node_id 1 NULL
null_ordered_last off NULL user NULL off off
on_update_rehome_row_enabled on NULL user NULL on on
opt_split_scan_limit 2048 NULL user NULL 2048 2048
optimizer_always_use_histograms off NULL user NULL off off
optimizer_use_forecasts on NULL user NULL on on
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_disjunction_stats on NULL user NULL on on
Expand Down Expand Up @@ -2940,6 +2942,7 @@ null_ordered_last NULL NULL NULL
on_update_rehome_row_enabled NULL NULL NULL NULL NULL
opt_split_scan_limit NULL NULL NULL NULL NULL
optimizer NULL NULL NULL NULL NULL
optimizer_always_use_histograms NULL NULL NULL NULL NULL
optimizer_use_forecasts NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ node_id 1
null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer_always_use_histograms off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ type Memo struct {
useImprovedDisjunctionStats bool
useLimitOrderingForStreamingGroupBy bool
useImprovedSplitDisjunctionForJoins bool
alwaysUseHistograms bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -219,6 +220,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
useImprovedDisjunctionStats: evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats,
useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy,
useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins,
alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms,
}
m.metadata.Init()
m.logPropsBuilder.init(ctx, evalCtx, m)
Expand Down Expand Up @@ -359,7 +361,8 @@ func (m *Memo) IsStale(
m.allowOrdinalColumnReferences != evalCtx.SessionData().AllowOrdinalColumnReferences ||
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats ||
m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy ||
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins {
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins ||
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats = false
notStale()

// Stale optimizer_always_use_histograms.
evalCtx.SessionData().OptimizerAlwaysUseHistograms = true
stale()
evalCtx.SessionData().OptimizerAlwaysUseHistograms = false
notStale()

// Stale data sources and schema. Create new catalog so that data sources are
// recreated and can be modified independently.
catalog = testcat.New()
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2815,6 +2815,9 @@ func (sb *statisticsBuilder) finalizeFromRowCountAndDistinctCounts(
}

func (sb *statisticsBuilder) shouldUseHistogram(relProps *props.Relational) bool {
if sb.evalCtx.SessionData().OptimizerAlwaysUseHistograms {
return true
}
// If we know that the cardinality is below a certain threshold (e.g., due to
// a constraint on a key column), don't bother adding the overhead of
// creating a histogram.
Expand Down
Loading

0 comments on commit 329a232

Please sign in to comment.