Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-22.2: opt: add setting to always use histograms to calculate stats #98230

Merged
merged 1 commit into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3391,6 +3391,10 @@ func (m *sessionDataMutator) SetOptimizerUseImprovedSplitDisjunctionForJoins(val
m.data.OptimizerUseImprovedSplitDisjunctionForJoins = val
}

func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) {
m.data.OptimizerAlwaysUseHistograms = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -4773,6 +4773,7 @@ null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer on
optimizer_always_use_histograms off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats off
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2806,6 +2806,7 @@ node_id 1 NULL
null_ordered_last off NULL NULL NULL string
on_update_rehome_row_enabled on NULL NULL NULL string
opt_split_scan_limit 2048 NULL NULL NULL string
optimizer_always_use_histograms off NULL NULL NULL string
optimizer_use_forecasts on NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_disjunction_stats off NULL NULL NULL string
Expand Down Expand Up @@ -2947,6 +2948,7 @@ node_id 1 NULL
null_ordered_last off NULL user NULL off off
on_update_rehome_row_enabled on NULL user NULL on on
opt_split_scan_limit 2048 NULL user NULL 2048 2048
optimizer_always_use_histograms off NULL user NULL off off
optimizer_use_forecasts on NULL user NULL on on
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_disjunction_stats off NULL user NULL off off
Expand Down Expand Up @@ -3086,6 +3088,7 @@ null_ordered_last NULL NULL NULL
on_update_rehome_row_enabled NULL NULL NULL NULL NULL
opt_split_scan_limit NULL NULL NULL NULL NULL
optimizer NULL NULL NULL NULL NULL
optimizer_always_use_histograms NULL NULL NULL NULL NULL
optimizer_use_forecasts NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ node_id 1
null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer_always_use_histograms off
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats off
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ type Memo struct {
useImprovedDisjunctionStats bool
useLimitOrderingForStreamingGroupBy bool
useImprovedSplitDisjunctionForJoins bool
alwaysUseHistograms bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -217,6 +218,7 @@ func (m *Memo) Init(evalCtx *eval.Context) {
useImprovedDisjunctionStats: evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats,
useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy,
useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins,
alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms,
}
m.metadata.Init()
m.logPropsBuilder.init(evalCtx, m)
Expand Down Expand Up @@ -356,7 +358,8 @@ func (m *Memo) IsStale(
m.variableInequalityLookupJoinEnabled != evalCtx.SessionData().VariableInequalityLookupJoinEnabled ||
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats ||
m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy ||
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins {
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins ||
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats = false
notStale()

// Stale optimizer_always_use_histograms.
evalCtx.SessionData().OptimizerAlwaysUseHistograms = true
stale()
evalCtx.SessionData().OptimizerAlwaysUseHistograms = false
notStale()

// Stale data sources and schema. Create new catalog so that data sources are
// recreated and can be modified independently.
catalog = testcat.New()
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2808,6 +2808,9 @@ func (sb *statisticsBuilder) finalizeFromRowCountAndDistinctCounts(
}

func (sb *statisticsBuilder) shouldUseHistogram(relProps *props.Relational) bool {
if sb.evalCtx.SessionData().OptimizerAlwaysUseHistograms {
return true
}
// If we know that the cardinality is below a certain threshold (e.g., due to
// a constraint on a key column), don't bother adding the overhead of
// creating a histogram.
Expand Down
Loading