Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql/stats: remove NumRange-stealing behavior from histogram prediction #86348

Merged
merged 1 commit into from
Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 138 additions & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/forecast
Original file line number Diff line number Diff line change
Expand Up @@ -592,8 +592,145 @@ scan c
├── columns: h:1
├── constraint: /1: [/'1988-08-07 00:00:00+00:00' - ]
├── stats: [rows=24, distinct(1)=24, null(1)=0, avgsize(1)=7]
│ histogram(1)= 0 1 5 1 5 1 5 1 4 1
│ histogram(1)= 0 1 5 1 5 1 5 1 5 0
│ <--- '1988-08-07 00:00:00+00:00' --- '1988-08-07 06:00:00+00:00' --- '1988-08-07 12:00:00+00:00' --- '1988-08-07 18:00:00+00:00' --- '1988-08-08 00:00:00+00:00'
├── cost: 39.7
├── key: (1)
└── distribution: test

# Test for issue 86344.

statement ok
CREATE TABLE x (a INT PRIMARY KEY) WITH (sql_stats_automatic_collection_enabled = false);

statement ok
ALTER TABLE x INJECT STATISTICS '[
{
"avg_size": 1,
"columns": [
"a"
],
"created_at": "2020-03-13 00:00:00.000000",
"distinct_count": 4,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "4"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "7"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "10"
}
],
"histo_col_type": "INT8",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 4
},
{
"avg_size": 1,
"columns": [
"a"
],
"created_at": "2020-03-14 00:00:00.000000",
"distinct_count": 4,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "7"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "10"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "13"
}
],
"histo_col_type": "INT8",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 4
},
{
"avg_size": 1,
"columns": [
"a"
],
"created_at": "2020-03-15 00:00:00.000000",
"distinct_count": 4,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "10"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "13"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "16"
}
],
"histo_col_type": "INT8",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 4
}
]';

query T
SELECT jsonb_pretty(stat->'histo_buckets')
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE x WITH FORECAST]
)
WHERE stat->>'name' = '__forecast__';
----
[
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "13"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "16"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "19"
}
]
10 changes: 5 additions & 5 deletions pkg/sql/stats/forecast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -490,22 +490,22 @@ func TestForecastColumnStatistics(t *testing.T) {
hist: testHistogram{},
},
{
at: 2, row: 5, dist: 2, null: 3, size: 2,
at: 2, row: 5, dist: 3, null: 3, size: 2,
hist: testHistogram{{1, 0, 0, 200}, {0, 1, 1, 800}},
},
{
at: 3, row: 7, dist: 3, null: 3, size: 2,
at: 3, row: 7, dist: 4, null: 3, size: 2,
hist: testHistogram{{2, 0, 0, 200}, {0, 2, 2, 800}},
},
{
at: 4, row: 9, dist: 4, null: 3, size: 2,
at: 4, row: 9, dist: 5, null: 3, size: 2,
hist: testHistogram{{3, 0, 0, 200}, {0, 3, 3, 800}},
},
},
at: 5,
forecast: &testStat{
at: 5, row: 11, dist: 5, null: 3, size: 2,
hist: testHistogram{{4, 0, 0, 200}, {1, 3, 2, 800}},
at: 5, row: 11, dist: 6, null: 3, size: 2,
hist: testHistogram{{4, 0, 0, 200}, {0, 4, 4, 800}},
},
},
// Histogram, constant numbers but changing shape
Expand Down
9 changes: 0 additions & 9 deletions pkg/sql/stats/quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,15 +282,6 @@ func (q quantile) toHistogram(colType *types.T, rowCount float64) (histogram, er
if !isValidCount(numEq) {
return errors.AssertionFailedf("invalid histogram NumEq: %v", numEq)
}
if numEq < 1 && currentBucket.NumRange+numEq >= 2 {
// Steal from NumRange so that NumEq is at least 1, if it wouldn't make
// NumRange 0. This makes the histogram look more like something
// EquiDepthHistogram would produce.
// TODO(michae2): Consider removing this logic if statistics_builder
// doesn't need it.
currentBucket.NumRange -= 1 - numEq
numEq = 1
}
currentBucket.NumEq = numEq

// Calculate DistinctRange for this bucket now that NumRange is finalized.
Expand Down
16 changes: 8 additions & 8 deletions pkg/sql/stats/quantile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,9 @@ func TestQuantileToHistogram(t *testing.T) {
hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}},
},
{
qfun: quantile{{0, 0}, {0.9, 100}, {1, 100}},
rows: 10,
hist: testHistogram{{0, 0, 0, 0}, {1, 9, 9, 100}},
qfun: quantile{{0, 0}, {0.9375, 100}, {1, 100}},
rows: 16,
hist: testHistogram{{0, 0, 0, 0}, {1, 15, 15, 100}},
},
{
qfun: quantile{{0, 100}, {0.25, 100}, {0.75, 200}, {1, 200}},
Expand All @@ -503,26 +503,26 @@ func TestQuantileToHistogram(t *testing.T) {
rows: 32,
hist: testHistogram{{4, 0, 0, 310}, {4, 0, 0, 320}, {8, 0, 0, 330}, {4, 0, 0, 340}, {4, 0, 0, 350}, {4, 0, 0, 360}, {4, 0, 0, 370}},
},
// Cases where we steal a row from NumRange to give to NumEq.
// Cases with 0 NumEq.
{
qfun: quantile{{0, 0}, {1, 100}},
rows: 2,
hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}},
hist: testHistogram{{0, 0, 0, 0}, {0, 2, 2, 100}},
},
{
qfun: quantile{{0, 100}, {0.5, 100}, {1, 200}, {1, 300}},
rows: 4,
hist: testHistogram{{2, 0, 0, 100}, {1, 1, 1, 200}},
hist: testHistogram{{2, 0, 0, 100}, {0, 2, 2, 200}},
},
{
qfun: quantile{{0, 0}, {0.875, 87.5}, {1, 100}},
rows: 8,
hist: testHistogram{{0, 0, 0, 0}, {1, 6, 6, 87.5}, {0, 1, 1, 100}},
hist: testHistogram{{0, 0, 0, 0}, {0, 7, 7, 87.5}, {0, 1, 1, 100}},
},
{
qfun: quantile{{0, 400}, {0.5, 600}, {0.75, 700}, {1, 800}},
rows: 16,
hist: testHistogram{{0, 0, 0, 400}, {1, 7, 7, 600}, {1, 3, 3, 700}, {1, 3, 3, 800}},
hist: testHistogram{{0, 0, 0, 400}, {0, 8, 8, 600}, {0, 4, 4, 700}, {0, 4, 4, 800}},
},
// Error cases.
{
Expand Down