diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 0d0321548a62..47331e13a1f5 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3298,6 +3298,10 @@ func (m *sessionDataMutator) SetOptimizerUseImprovedDisjunctionStats(val bool) { m.data.OptimizerUseImprovedDisjunctionStats = val } +func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) { + m.data.OptimizerAlwaysUseHistograms = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 1c1da2946eda..c46b01b15c87 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4773,6 +4773,7 @@ null_ordered_last off on_update_rehome_row_enabled on opt_split_scan_limit 2048 optimizer on +optimizer_always_use_histograms off optimizer_use_histograms on optimizer_use_improved_disjunction_stats off optimizer_use_multicol_stats on diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 1db1e8dff3fa..9703823f0d1f 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -4204,6 +4204,7 @@ node_id 1 NULL null_ordered_last off NULL NULL NULL string on_update_rehome_row_enabled on NULL NULL NULL string opt_split_scan_limit 2048 NULL NULL NULL string +optimizer_always_use_histograms off NULL NULL NULL string optimizer_use_histograms on NULL NULL NULL string optimizer_use_improved_disjunction_stats off NULL NULL NULL string optimizer_use_multicol_stats on NULL NULL NULL string @@ -4335,6 +4336,7 @@ node_id 1 NULL null_ordered_last off NULL user NULL off off on_update_rehome_row_enabled on NULL user NULL on on opt_split_scan_limit 2048 NULL user NULL 2048 2048 +optimizer_always_use_histograms off NULL user NULL off off optimizer_use_histograms on NULL user NULL on on optimizer_use_improved_disjunction_stats off NULL user NULL off off optimizer_use_multicol_stats on NULL user NULL on on @@ -4462,6 +4464,7 @@ null_ordered_last NULL NULL NULL on_update_rehome_row_enabled NULL NULL NULL NULL NULL opt_split_scan_limit NULL NULL NULL NULL NULL optimizer NULL NULL NULL NULL NULL +optimizer_always_use_histograms NULL NULL NULL NULL NULL optimizer_use_histograms NULL NULL NULL NULL NULL optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL optimizer_use_multicol_stats NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 92b99b0cc68f..2df8990bd5ee 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -102,6 +102,7 @@ node_id 1 null_ordered_last off on_update_rehome_row_enabled on opt_split_scan_limit 2048 +optimizer_always_use_histograms off optimizer_use_histograms on optimizer_use_improved_disjunction_stats off optimizer_use_multicol_stats on diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index d181ad4a3ec6..529fd1cf1c56 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -29,10 +29,10 @@ import ( // called groups where each group contains a set of logically equivalent // expressions. Two expressions are considered logically equivalent if: // -// 1. They return the same number and data type of columns. However, order and -// naming of columns doesn't matter. -// 2. They return the same number of rows, with the same values in each row. -// However, order of rows doesn't matter. +// 1. They return the same number and data type of columns. However, order and +// naming of columns doesn't matter. +// 2. They return the same number of rows, with the same values in each row. +// However, order of rows doesn't matter. // // The different expressions in a single group are called memo expressions // (memo-ized expressions). The children of a memo expression can themselves be @@ -72,17 +72,17 @@ import ( // in-memory instance. This allows interned expressions to be checked for // equivalence by simple pointer comparison. For example: // -// SELECT * FROM a, b WHERE a.x = b.x +// SELECT * FROM a, b WHERE a.x = b.x // // After insertion into the memo, the memo would contain these six groups, with // numbers substituted for pointers to the normalized expression in each group: // -// G6: [inner-join [G1 G2 G5]] -// G5: [eq [G3 G4]] -// G4: [variable b.x] -// G3: [variable a.x] -// G2: [scan b] -// G1: [scan a] +// G6: [inner-join [G1 G2 G5]] +// G5: [eq [G3 G4]] +// G4: [variable b.x] +// G3: [variable a.x] +// G2: [scan b] +// G1: [scan a] // // Each leaf expressions is interned by hashing its operator type and any // private field values. Expressions higher in the tree can then rely on the @@ -98,12 +98,12 @@ import ( // added by the factory. For example, the join commutativity transformation // expands the memo like this: // -// G6: [inner-join [G1 G2 G5]] [inner-join [G2 G1 G5]] -// G5: [eq [G3 G4]] -// G4: [variable b.x] -// G3: [variable a.x] -// G2: [scan b] -// G1: [scan a] +// G6: [inner-join [G1 G2 G5]] [inner-join [G2 G1 G5]] +// G5: [eq [G3 G4]] +// G4: [variable b.x] +// G3: [variable a.x] +// G2: [scan b] +// G1: [scan a] // // See the comments in explorer.go for more details. type Memo struct { @@ -156,6 +156,7 @@ type Memo struct { testingOptimizerCostPerturbation float64 testingOptimizerDisableRuleProbability float64 useImprovedDisjunctionStats bool + alwaysUseHistograms bool // curRank is the highest currently in-use scalar expression rank. curRank opt.ScalarRank @@ -207,6 +208,7 @@ func (m *Memo) Init(evalCtx *tree.EvalContext) { testingOptimizerCostPerturbation: evalCtx.SessionData().TestingOptimizerCostPerturbation, testingOptimizerDisableRuleProbability: evalCtx.SessionData().TestingOptimizerDisableRuleProbability, useImprovedDisjunctionStats: evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats, + alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms, } m.metadata.Init() m.logPropsBuilder.init(evalCtx, m) @@ -298,14 +300,14 @@ func (m *Memo) HasPlaceholders() bool { // that takes into account the changes. IsStale checks the following // dependencies: // -// 1. Current database: this can change name resolution. -// 2. Current search path: this can change name resolution. -// 3. Current location: this determines time zone, and can change how time- -// related types are constructed and compared. -// 4. Data source schema: this determines most aspects of how the query is -// compiled. -// 5. Data source privileges: current user may no longer have access to one or -// more data sources. +// 1. Current database: this can change name resolution. +// 2. Current search path: this can change name resolution. +// 3. Current location: this determines time zone, and can change how time- +// related types are constructed and compared. +// 4. Data source schema: this determines most aspects of how the query is +// compiled. +// 5. Data source privileges: current user may no longer have access to one or +// more data sources. // // This function cannot swallow errors and return only a boolean, as it may // perform KV operations on behalf of the transaction associated with the @@ -335,7 +337,8 @@ func (m *Memo) IsStale( m.testingOptimizerRandomSeed != evalCtx.SessionData().TestingOptimizerRandomSeed || m.testingOptimizerCostPerturbation != evalCtx.SessionData().TestingOptimizerCostPerturbation || m.testingOptimizerDisableRuleProbability != evalCtx.SessionData().TestingOptimizerDisableRuleProbability || - m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats { + m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats || + m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 8d74f02610d4..e50d48bed69d 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -302,6 +302,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats = false notStale() + // Stale optimizer_always_use_histograms. + evalCtx.SessionData().OptimizerAlwaysUseHistograms = true + stale() + evalCtx.SessionData().OptimizerAlwaysUseHistograms = false + notStale() + // Stale data sources and schema. Create new catalog so that data sources are // recreated and can be modified independently. catalog = testcat.New() diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index ff7464372062..68422b7feb39 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -2740,6 +2740,9 @@ func (sb *statisticsBuilder) finalizeFromRowCountAndDistinctCounts( } func (sb *statisticsBuilder) shouldUseHistogram(relProps *props.Relational) bool { + if sb.evalCtx.SessionData().OptimizerAlwaysUseHistograms { + return true + } // If we know that the cardinality is below a certain threshold (e.g., due to // a constraint on a key column), don't bother adding the overhead of // creating a histogram. diff --git a/pkg/sql/opt/memo/testdata/stats/scan b/pkg/sql/opt/memo/testdata/stats/scan index 26d7b9e8ca76..b9d993437cae 100644 --- a/pkg/sql/opt/memo/testdata/stats/scan +++ b/pkg/sql/opt/memo/testdata/stats/scan @@ -3083,3 +3083,330 @@ top-k │ <---- 3 -------- 5 ---- ├── key: (6,7) └── fd: ()-->(2,3,5), (6,7)-->(1,4) + +# Tests for when stats are stale. + +exec-ddl +CREATE TABLE stale ( + w STRING PRIMARY KEY, + x STRING, + y STRING, + z STRING, + UNIQUE (x, y), + INDEX (x, z) +) +---- + +exec-ddl +ALTER TABLE stale INJECT STATISTICS '[ + { + "avg_size": 7, + "columns": [ + "x" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 10, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo1" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo10" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo2" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo3" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo4" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo5" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo6" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo7" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo8" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "foo9" + } + ], + "histo_col_type": "STRING", + "histo_version": 2, + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 16, + "columns": [ + "x", + "y" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 110, + "histo_col_type": "", + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 13, + "columns": [ + "x", + "z" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 10, + "histo_col_type": "", + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 9, + "columns": [ + "y" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 11, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1000" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1001" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1002" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1003" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1004" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1005" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1006" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1007" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1008" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1009" + }, + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "bar1010" + } + ], + "histo_col_type": "STRING", + "histo_version": 2, + "null_count": 0, + "row_count": 110 + }, + { + "avg_size": 7, + "columns": [ + "z" + ], + "created_at": "2023-03-08 01:51:41.258198", + "distinct_count": 10, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz1" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz10" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz2" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz3" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz4" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz5" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz6" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz7" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz8" + }, + { + "distinct_range": 0, + "num_eq": 11, + "num_range": 0, + "upper_bound": "baz9" + } + ], + "histo_col_type": "STRING", + "histo_version": 2, + "null_count": 0, + "row_count": 110 + } +]' +---- + +# When optimizer_always_use_histograms is disabled, we may choose the non-unique +# index. +opt set=optimizer_always_use_histograms=false +SELECT * FROM stale WHERE x = 'bar1' AND (y = 'bar1000' OR y = 'bar1001') LIMIT 1 +---- +limit + ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) z:4(string) + ├── cardinality: [0 - 1] + ├── stats: [rows=1] + ├── key: () + ├── fd: ()-->(1-4) + ├── select + │ ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) z:4(string) + │ ├── cardinality: [0 - 2] + │ ├── stats: [rows=2, distinct(2)=1, null(2)=0, distinct(3)=2, null(3)=0, distinct(2,3)=2, null(2,3)=0] + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3,4), (3)-->(1,4) + │ ├── limit hint: 1.00 + │ ├── index-join stale + │ │ ├── columns: w:1(string!null) x:2(string) y:3(string) z:4(string) + │ │ ├── stats: [rows=2.2e-08] + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(3,4), (2,3)~~>(1,4) + │ │ ├── limit hint: 0.00 + │ │ └── scan stale@stale_x_z_idx + │ │ ├── columns: w:1(string!null) x:2(string!null) z:4(string) + │ │ ├── constraint: /2/4/1: [/'bar1' - /'bar1'] + │ │ ├── stats: [rows=2.2e-08, distinct(2)=2.2e-08, null(2)=0] + │ │ │ histogram(2)= + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(4) + │ │ └── limit hint: 0.00 + │ └── filters + │ └── (y:3 = 'bar1000') OR (y:3 = 'bar1001') [type=bool, outer=(3), constraints=(/3: [/'bar1000' - /'bar1000'] [/'bar1001' - /'bar1001']; tight)] + └── 1 [type=int] + +# When optimizer_always_use_histograms is enabled, we should choose the unique +# index. +opt set=optimizer_always_use_histograms=true +SELECT * FROM stale WHERE x = 'bar1' AND (y = 'bar1000' OR y = 'bar1001') LIMIT 1 +---- +index-join stale + ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) z:4(string) + ├── cardinality: [0 - 1] + ├── stats: [rows=2.2e-08] + ├── key: () + ├── fd: ()-->(1-4) + └── scan stale@stale_x_y_key + ├── columns: w:1(string!null) x:2(string!null) y:3(string!null) + ├── constraint: /2/3 + │ ├── [/'bar1'/'bar1000' - /'bar1'/'bar1000'] + │ └── [/'bar1'/'bar1001' - /'bar1'/'bar1001'] + ├── limit: 1 + ├── stats: [rows=2.2e-08] + ├── key: () + └── fd: ()-->(1-3) diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index ea9062df8e41..082e3dfc8b1e 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -279,6 +279,9 @@ message LocalOnlySessionData { // CopyFromRetriesEnabled controls whether retries should be internally // attempted for retriable errors. bool copy_from_retries_enabled = 89; + // OptimizerAlwaysUseHistograms, when true, ensures that the optimizer + // always uses histograms to calculate statistics if available. + bool optimizer_always_use_histograms = 94; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index b973f4127346..d3165d897e7d 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2272,6 +2272,23 @@ var varGen = map[string]sessionVar{ }, GlobalDefault: globalFalse, }, + + // CockroachDB extension. + `optimizer_always_use_histograms`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_always_use_histograms`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_always_use_histograms", s) + if err != nil { + return err + } + m.SetOptimizerAlwaysUseHistograms(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerAlwaysUseHistograms), nil + }, + GlobalDefault: globalFalse, + }, } const compatErrMsg = "this parameter is currently recognized only for compatibility and has no effect in CockroachDB."