Skip to content

Commit

Permalink
opt: add setting to always use histograms to calculate stats
Browse files Browse the repository at this point in the history
Informs #64570

Release note (sql change): Added a new session setting,
optimizer_always_use_histograms, which ensures that the optimizer
always uses histograms when available to calculate the statistics
of every plan that it explores. Enabling this setting can prevent
the optimizer from choosing a suboptimal index when statistics for
a table are stale.
  • Loading branch information
rytaft committed Mar 8, 2023
1 parent cbdd452 commit 5c178e5
Show file tree
Hide file tree
Showing 10 changed files with 369 additions and 1 deletion.
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3298,6 +3298,10 @@ func (m *sessionDataMutator) SetOptimizerUseImprovedDisjunctionStats(val bool) {
m.data.OptimizerUseImprovedDisjunctionStats = val
}

func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) {
m.data.OptimizerAlwaysUseHistograms = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -4773,6 +4773,7 @@ null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer on
optimizer_always_use_histograms off
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats off
optimizer_use_multicol_stats on
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -4204,6 +4204,7 @@ node_id 1 NULL
null_ordered_last off NULL NULL NULL string
on_update_rehome_row_enabled on NULL NULL NULL string
opt_split_scan_limit 2048 NULL NULL NULL string
optimizer_always_use_histograms off NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_disjunction_stats off NULL NULL NULL string
optimizer_use_multicol_stats on NULL NULL NULL string
Expand Down Expand Up @@ -4335,6 +4336,7 @@ node_id 1 NULL
null_ordered_last off NULL user NULL off off
on_update_rehome_row_enabled on NULL user NULL on on
opt_split_scan_limit 2048 NULL user NULL 2048 2048
optimizer_always_use_histograms off NULL user NULL off off
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_disjunction_stats off NULL user NULL off off
optimizer_use_multicol_stats on NULL user NULL on on
Expand Down Expand Up @@ -4462,6 +4464,7 @@ null_ordered_last NULL NULL NULL
on_update_rehome_row_enabled NULL NULL NULL NULL NULL
opt_split_scan_limit NULL NULL NULL NULL NULL
optimizer NULL NULL NULL NULL NULL
optimizer_always_use_histograms NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
optimizer_use_multicol_stats NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ node_id 1
null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer_always_use_histograms off
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats off
optimizer_use_multicol_stats on
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ type Memo struct {
testingOptimizerCostPerturbation float64
testingOptimizerDisableRuleProbability float64
useImprovedDisjunctionStats bool
alwaysUseHistograms bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -207,6 +208,7 @@ func (m *Memo) Init(evalCtx *tree.EvalContext) {
testingOptimizerCostPerturbation: evalCtx.SessionData().TestingOptimizerCostPerturbation,
testingOptimizerDisableRuleProbability: evalCtx.SessionData().TestingOptimizerDisableRuleProbability,
useImprovedDisjunctionStats: evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats,
alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms,
}
m.metadata.Init()
m.logPropsBuilder.init(evalCtx, m)
Expand Down Expand Up @@ -335,7 +337,8 @@ func (m *Memo) IsStale(
m.testingOptimizerRandomSeed != evalCtx.SessionData().TestingOptimizerRandomSeed ||
m.testingOptimizerCostPerturbation != evalCtx.SessionData().TestingOptimizerCostPerturbation ||
m.testingOptimizerDisableRuleProbability != evalCtx.SessionData().TestingOptimizerDisableRuleProbability ||
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats {
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats ||
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats = false
notStale()

// Stale optimizer_always_use_histograms.
evalCtx.SessionData().OptimizerAlwaysUseHistograms = true
stale()
evalCtx.SessionData().OptimizerAlwaysUseHistograms = false
notStale()

// Stale data sources and schema. Create new catalog so that data sources are
// recreated and can be modified independently.
catalog = testcat.New()
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2740,6 +2740,9 @@ func (sb *statisticsBuilder) finalizeFromRowCountAndDistinctCounts(
}

func (sb *statisticsBuilder) shouldUseHistogram(relProps *props.Relational) bool {
if sb.evalCtx.SessionData().OptimizerAlwaysUseHistograms {
return true
}
// If we know that the cardinality is below a certain threshold (e.g., due to
// a constraint on a key column), don't bother adding the overhead of
// creating a histogram.
Expand Down
Loading

0 comments on commit 5c178e5

Please sign in to comment.