From 98d1e0c74770a6fa6c932877ec9177bf0e58019a Mon Sep 17 00:00:00 2001 From: Michael Erickson Date: Wed, 17 Aug 2022 16:11:04 -0700 Subject: [PATCH] sql/stats: remove NumRange-stealing behavior from histogram prediction We should be able to handle NumEq=0 just fine everywhere that uses histograms, so delete this NumRange-stealing code. Fixes: #86344 Release justification: low-risk updates to new functionality. Release note: None --- .../opt/exec/execbuilder/testdata/forecast | 139 +++++++++++++++++- pkg/sql/stats/forecast_test.go | 10 +- pkg/sql/stats/quantile.go | 9 -- pkg/sql/stats/quantile_test.go | 16 +- 4 files changed, 151 insertions(+), 23 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/forecast b/pkg/sql/opt/exec/execbuilder/testdata/forecast index 17d009ac4d28..8d5d5b71efbe 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/forecast +++ b/pkg/sql/opt/exec/execbuilder/testdata/forecast @@ -592,8 +592,145 @@ scan c ├── columns: h:1 ├── constraint: /1: [/'1988-08-07 00:00:00+00:00' - ] ├── stats: [rows=24, distinct(1)=24, null(1)=0, avgsize(1)=7] - │ histogram(1)= 0 1 5 1 5 1 5 1 4 1 + │ histogram(1)= 0 1 5 1 5 1 5 1 5 0 │ <--- '1988-08-07 00:00:00+00:00' --- '1988-08-07 06:00:00+00:00' --- '1988-08-07 12:00:00+00:00' --- '1988-08-07 18:00:00+00:00' --- '1988-08-08 00:00:00+00:00' ├── cost: 39.7 ├── key: (1) └── distribution: test + +# Test for issue 86344. + +statement ok +CREATE TABLE x (a INT PRIMARY KEY) WITH (sql_stats_automatic_collection_enabled = false); + +statement ok +ALTER TABLE x INJECT STATISTICS '[ + { + "avg_size": 1, + "columns": [ + "a" + ], + "created_at": "2020-03-13 00:00:00.000000", + "distinct_count": 4, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "4" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "7" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "10" + } + ], + "histo_col_type": "INT8", + "histo_version": 2, + "name": "__auto__", + "null_count": 0, + "row_count": 4 + }, + { + "avg_size": 1, + "columns": [ + "a" + ], + "created_at": "2020-03-14 00:00:00.000000", + "distinct_count": 4, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "7" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "10" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "13" + } + ], + "histo_col_type": "INT8", + "histo_version": 2, + "name": "__auto__", + "null_count": 0, + "row_count": 4 + }, + { + "avg_size": 1, + "columns": [ + "a" + ], + "created_at": "2020-03-15 00:00:00.000000", + "distinct_count": 4, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "10" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "13" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "16" + } + ], + "histo_col_type": "INT8", + "histo_version": 2, + "name": "__auto__", + "null_count": 0, + "row_count": 4 + } +]'; + +query T +SELECT jsonb_pretty(stat->'histo_buckets') +FROM ( + SELECT jsonb_array_elements(statistics) AS stat + FROM [SHOW STATISTICS USING JSON FOR TABLE x WITH FORECAST] +) +WHERE stat->>'name' = '__forecast__'; +---- +[ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "13" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "16" + }, + { + "distinct_range": 2, + "num_eq": 0, + "num_range": 2, + "upper_bound": "19" + } +] diff --git a/pkg/sql/stats/forecast_test.go b/pkg/sql/stats/forecast_test.go index 65d384711964..271461e6b80c 100644 --- a/pkg/sql/stats/forecast_test.go +++ b/pkg/sql/stats/forecast_test.go @@ -490,22 +490,22 @@ func TestForecastColumnStatistics(t *testing.T) { hist: testHistogram{}, }, { - at: 2, row: 5, dist: 2, null: 3, size: 2, + at: 2, row: 5, dist: 3, null: 3, size: 2, hist: testHistogram{{1, 0, 0, 200}, {0, 1, 1, 800}}, }, { - at: 3, row: 7, dist: 3, null: 3, size: 2, + at: 3, row: 7, dist: 4, null: 3, size: 2, hist: testHistogram{{2, 0, 0, 200}, {0, 2, 2, 800}}, }, { - at: 4, row: 9, dist: 4, null: 3, size: 2, + at: 4, row: 9, dist: 5, null: 3, size: 2, hist: testHistogram{{3, 0, 0, 200}, {0, 3, 3, 800}}, }, }, at: 5, forecast: &testStat{ - at: 5, row: 11, dist: 5, null: 3, size: 2, - hist: testHistogram{{4, 0, 0, 200}, {1, 3, 2, 800}}, + at: 5, row: 11, dist: 6, null: 3, size: 2, + hist: testHistogram{{4, 0, 0, 200}, {0, 4, 4, 800}}, }, }, // Histogram, constant numbers but changing shape diff --git a/pkg/sql/stats/quantile.go b/pkg/sql/stats/quantile.go index 2e69d6cd154c..e8f68d5e8a66 100644 --- a/pkg/sql/stats/quantile.go +++ b/pkg/sql/stats/quantile.go @@ -282,15 +282,6 @@ func (q quantile) toHistogram(colType *types.T, rowCount float64) (histogram, er if !isValidCount(numEq) { return errors.AssertionFailedf("invalid histogram NumEq: %v", numEq) } - if numEq < 1 && currentBucket.NumRange+numEq >= 2 { - // Steal from NumRange so that NumEq is at least 1, if it wouldn't make - // NumRange 0. This makes the histogram look more like something - // EquiDepthHistogram would produce. - // TODO(michae2): Consider removing this logic if statistics_builder - // doesn't need it. - currentBucket.NumRange -= 1 - numEq - numEq = 1 - } currentBucket.NumEq = numEq // Calculate DistinctRange for this bucket now that NumRange is finalized. diff --git a/pkg/sql/stats/quantile_test.go b/pkg/sql/stats/quantile_test.go index 48381cd1898f..320d5b3dc4ae 100644 --- a/pkg/sql/stats/quantile_test.go +++ b/pkg/sql/stats/quantile_test.go @@ -479,9 +479,9 @@ func TestQuantileToHistogram(t *testing.T) { hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}}, }, { - qfun: quantile{{0, 0}, {0.9, 100}, {1, 100}}, - rows: 10, - hist: testHistogram{{0, 0, 0, 0}, {1, 9, 9, 100}}, + qfun: quantile{{0, 0}, {0.9375, 100}, {1, 100}}, + rows: 16, + hist: testHistogram{{0, 0, 0, 0}, {1, 15, 15, 100}}, }, { qfun: quantile{{0, 100}, {0.25, 100}, {0.75, 200}, {1, 200}}, @@ -503,26 +503,26 @@ func TestQuantileToHistogram(t *testing.T) { rows: 32, hist: testHistogram{{4, 0, 0, 310}, {4, 0, 0, 320}, {8, 0, 0, 330}, {4, 0, 0, 340}, {4, 0, 0, 350}, {4, 0, 0, 360}, {4, 0, 0, 370}}, }, - // Cases where we steal a row from NumRange to give to NumEq. + // Cases with 0 NumEq. { qfun: quantile{{0, 0}, {1, 100}}, rows: 2, - hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}}, + hist: testHistogram{{0, 0, 0, 0}, {0, 2, 2, 100}}, }, { qfun: quantile{{0, 100}, {0.5, 100}, {1, 200}, {1, 300}}, rows: 4, - hist: testHistogram{{2, 0, 0, 100}, {1, 1, 1, 200}}, + hist: testHistogram{{2, 0, 0, 100}, {0, 2, 2, 200}}, }, { qfun: quantile{{0, 0}, {0.875, 87.5}, {1, 100}}, rows: 8, - hist: testHistogram{{0, 0, 0, 0}, {1, 6, 6, 87.5}, {0, 1, 1, 100}}, + hist: testHistogram{{0, 0, 0, 0}, {0, 7, 7, 87.5}, {0, 1, 1, 100}}, }, { qfun: quantile{{0, 400}, {0.5, 600}, {0.75, 700}, {1, 800}}, rows: 16, - hist: testHistogram{{0, 0, 0, 400}, {1, 7, 7, 600}, {1, 3, 3, 700}, {1, 3, 3, 800}}, + hist: testHistogram{{0, 0, 0, 400}, {0, 8, 8, 600}, {0, 4, 4, 700}, {0, 4, 4, 800}}, }, // Error cases. {