diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index b3297511a589..ec5d3b332ee2 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3466,6 +3466,10 @@ func (m *sessionDataMutator) SetEnforceHomeRegionFollowerReadsEnabled(val bool) m.data.EnforceHomeRegionFollowerReadsEnabled = val } +func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) { + m.data.OptimizerAlwaysUseHistograms = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 606c3df46958..1bb09ce63b60 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -5016,6 +5016,7 @@ null_ordered_last off on_update_rehome_row_enabled on opt_split_scan_limit 2048 optimizer on +optimizer_always_use_histograms off optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_disjunction_stats on diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 3855ce84b459..1a4714bf4763 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2647,6 +2647,7 @@ node_id 1 NULL null_ordered_last off NULL NULL NULL string on_update_rehome_row_enabled on NULL NULL NULL string opt_split_scan_limit 2048 NULL NULL NULL string +optimizer_always_use_histograms off NULL NULL NULL string optimizer_use_forecasts on NULL NULL NULL string optimizer_use_histograms on NULL NULL NULL string optimizer_use_improved_disjunction_stats on NULL NULL NULL string @@ -2794,6 +2795,7 @@ node_id 1 NULL null_ordered_last off NULL user NULL off off on_update_rehome_row_enabled on NULL user NULL on on opt_split_scan_limit 2048 NULL user NULL 2048 2048 +optimizer_always_use_histograms off NULL user NULL off off optimizer_use_forecasts on NULL user NULL on on optimizer_use_histograms on NULL user NULL on on optimizer_use_improved_disjunction_stats on NULL user NULL on on @@ -2940,6 +2942,7 @@ null_ordered_last NULL NULL NULL on_update_rehome_row_enabled NULL NULL NULL NULL NULL opt_split_scan_limit NULL NULL NULL NULL NULL optimizer NULL NULL NULL NULL NULL +optimizer_always_use_histograms NULL NULL NULL NULL NULL optimizer_use_forecasts NULL NULL NULL NULL NULL optimizer_use_histograms NULL NULL NULL NULL NULL optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 9baa3b0503e0..96b76a3cff2f 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -109,6 +109,7 @@ node_id 1 null_ordered_last off on_update_rehome_row_enabled on opt_split_scan_limit 2048 +optimizer_always_use_histograms off optimizer_use_forecasts on optimizer_use_histograms on optimizer_use_improved_disjunction_stats on diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index c345c029095f..e7a3935685bc 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -162,6 +162,7 @@ type Memo struct { useImprovedDisjunctionStats bool useLimitOrderingForStreamingGroupBy bool useImprovedSplitDisjunctionForJoins bool + alwaysUseHistograms bool // curRank is the highest currently in-use scalar expression rank. curRank opt.ScalarRank @@ -219,6 +220,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) { useImprovedDisjunctionStats: evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats, useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy, useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins, + alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms, } m.metadata.Init() m.logPropsBuilder.init(ctx, evalCtx, m) @@ -359,7 +361,8 @@ func (m *Memo) IsStale( m.allowOrdinalColumnReferences != evalCtx.SessionData().AllowOrdinalColumnReferences || m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats || m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy || - m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins { + m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins || + m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 02a33bba4328..22497c47dae3 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -348,6 +348,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats = false notStale() + // Stale optimizer_always_use_histograms. + evalCtx.SessionData().OptimizerAlwaysUseHistograms = true + stale() + evalCtx.SessionData().OptimizerAlwaysUseHistograms = false + notStale() + // Stale data sources and schema. Create new catalog so that data sources are // recreated and can be modified independently. catalog = testcat.New() diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index 7f707b405f3f..bd59a6da8aa5 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -2815,6 +2815,9 @@ func (sb *statisticsBuilder) finalizeFromRowCountAndDistinctCounts( } func (sb *statisticsBuilder) shouldUseHistogram(relProps *props.Relational) bool { + if sb.evalCtx.SessionData().OptimizerAlwaysUseHistograms { + return true + } // If we know that the cardinality is below a certain threshold (e.g., due to // a constraint on a key column), don't bother adding the overhead of // creating a histogram. diff --git a/pkg/sql/opt/memo/testdata/stats/scan b/pkg/sql/opt/memo/testdata/stats/scan index a6ee4e300678..b9c35611dfa6 100644 --- a/pkg/sql/opt/memo/testdata/stats/scan +++ b/pkg/sql/opt/memo/testdata/stats/scan @@ -3139,3 +3139,330 @@ project │ └── fd: (1)-->(2,3) └── filters └── x:1 < 10 [type=bool, outer=(1), constraints=(/1: (/NULL - /9]; tight)] + +# Tests for when stats are stale. + +exec-ddl +CREATE TABLE stale ( + w STRING PRIMARY KEY, + x STRING, + y STRING, + z STRING, + UNIQUE (x, y), + INDEX (x, z) +) +---- + +exec-ddl +ALTER TABLE stale INJECT STATISTICS '[ + { + "avg_size": 7, + "columns": [ + "x" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 10, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo1" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo10" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo2" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo3" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo4" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo5" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo6" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo7" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo8" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo9" + } + ], + "histo_col_type": "STRING", + "histo_version": 2, + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 16, + "columns": [ + "x", + "y" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 110, + "histo_col_type": "", + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 13, + "columns": [ + "x", + "z" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 10, + "histo_col_type": "", + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 9, + "columns": [ + "y" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 11, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1000" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1001" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1002" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1003" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1004" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1005" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1006" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1007" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1008" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1009" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1010" + } + ], + "histo_col_type": "STRING", + "histo_version": 2, + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 7, + "columns": [ + "z" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 10, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz1" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz10" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz2" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz3" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz4" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz5" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz6" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz7" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz8" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz9" + } + ], + "histo_col_type": "STRING", + "histo_version": 2, + "null_count": 0, + "row_count": 110 + } +]' +---- + +# When optimizer_always_use_histograms is disabled, we may choose the non-unique +# index. +opt set=optimizer_always_use_histograms=false +SELECT * FROM stale WHERE x = 'bar1' AND (y = 'bar1000' OR y = 'bar1001') LIMIT 1 +---- +limit + ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) z:4(string) + ├── cardinality: [0 - 1] + ├── stats: [rows=1] + ├── key: () + ├── fd: ()-->(1-4) + ├── select + │ ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) z:4(string) + │ ├── cardinality: [0 - 2] + │ ├── stats: [rows=2, distinct(2)=1, null(2)=0, distinct(3)=2, null(3)=0, distinct(2,3)=2, null(2,3)=0] + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3,4), (3)-->(1,4) + │ ├── limit hint: 1.00 + │ ├── index-join stale + │ │ ├── columns: w:1(string!null) x:2(string) y:3(string) z:4(string) + │ │ ├── stats: [rows=2.2e-08] + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(3,4), (2,3)~~>(1,4) + │ │ ├── limit hint: 0.00 + │ │ └── scan stale@stale_x_z_idx + │ │ ├── columns: w:1(string!null) x:2(string!null) z:4(string) + │ │ ├── constraint: /2/4/1: [/'bar1' - /'bar1'] + │ │ ├── stats: [rows=2.2e-08, distinct(2)=2.2e-08, null(2)=0] + │ │ │ histogram(2)= + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(4) + │ │ └── limit hint: 0.00 + │ └── filters + │ └── (y:3 = 'bar1000') OR (y:3 = 'bar1001') [type=bool, outer=(3), constraints=(/3: [/'bar1000' - /'bar1000'] [/'bar1001' - /'bar1001']; tight)] + └── 1 [type=int] + +# When optimizer_always_use_histograms is enabled, we should choose the unique +# index. +opt set=optimizer_always_use_histograms=true +SELECT * FROM stale WHERE x = 'bar1' AND (y = 'bar1000' OR y = 'bar1001') LIMIT 1 +---- +index-join stale + ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) z:4(string) + ├── cardinality: [0 - 1] + ├── stats: [rows=2.2e-08] + ├── key: () + ├── fd: ()-->(1-4) + └── scan stale@stale_x_y_key + ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) + ├── constraint: /2/3 + │ ├── [/'bar1'/'bar1000' - /'bar1'/'bar1000'] + │ └── [/'bar1'/'bar1001' - /'bar1'/'bar1001'] + ├── limit: 1 + ├── stats: [rows=2.2e-08] + ├── key: () + └── fd: ()-->(1-3) diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index 94b1b6a0efc8..400b4a9ce958 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -343,6 +343,9 @@ message LocalOnlySessionData { // follower reads to dynamically detect and report a query's home region // when the enforce_home_region session setting is also true. bool enforce_home_region_follower_reads_enabled = 93; + // OptimizerAlwaysUseHistograms, when true, ensures that the optimizer + // always uses histograms to calculate statistics if available. + bool optimizer_always_use_histograms = 94; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 69d173439a83..fb20594ffdcd 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2515,6 +2515,23 @@ var varGen = map[string]sessionVar{ }, GlobalDefault: globalFalse, }, + + // CockroachDB extension. + `optimizer_always_use_histograms`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_always_use_histograms`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_always_use_histograms", s) + if err != nil { + return err + } + m.SetOptimizerAlwaysUseHistograms(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerAlwaysUseHistograms), nil + }, + GlobalDefault: globalFalse, + }, } // We want test coverage for this on and off so make it metamorphic.