Skip to content

Commit

Permalink
Merge #100763
Browse files Browse the repository at this point in the history
100763: opt: fix histogram filtering with infinite upper bound r=mgartner a=mgartner

This commit fixes a bug that caused the filtering a histogram bucket
with an infinite upper bound to produce a bucket with a `numRange` of
`NaN`. This led to bad query plans because the `NaN` did not reduce the
input expression's row count at all.

Fixes #100582

Release note (bug fix): A bug has been fixed that caused suboptimal
query plans when a table contained infinite values, like
`'+Infinity'::DECIMAL`. This bug is present since versions v22.1, and
likely earlier. It could also be triggered in rare cases when table
statistics forecasts created a forecasted bucket with an infinite value.


Co-authored-by: Marcus Gartner <[email protected]>
  • Loading branch information
craig[bot] and mgartner committed Apr 10, 2023
2 parents 9ce0ac2 + 7b8ad4c commit 82e09ed
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 6 deletions.
66 changes: 66 additions & 0 deletions pkg/sql/opt/memo/testdata/stats/select
Original file line number Diff line number Diff line change
Expand Up @@ -3580,6 +3580,72 @@ project
│ └── b:3 = 1 [type=bool, outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)]
└── filters (true)

# Regression test for #100582. A bucket with an infinite upper bound should not
# create NaNs in filtered histograms.
exec-ddl
CREATE TABLE t100582 (
id INT PRIMARY KEY,
s STRING NULL,
d DECIMAL NULL
);
----

exec-ddl
ALTER TABLE t100582 INJECT STATISTICS '[
{
"columns": ["d"],
"row_count": 3,
"distinct_count": 3,
"null_count": 0,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "0"
},
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "1"
},
{
"distinct_range": 1,
"num_eq": 0,
"num_range": 1,
"upper_bound": "Infinity"
}
],
"histo_col_type": "DECIMAL",
"created_at": "2023-04-04 07:47:43.132805",
"histo_version": 2
}
]'
----

# The filtered histogram should not have a numRange of NaN.
norm
SELECT s FROM t100582 WHERE d >= 2;
----
project
├── columns: s:2(string)
├── immutable
├── stats: [rows=0.75]
└── select
├── columns: s:2(string) d:3(decimal!null)
├── immutable
├── stats: [rows=0.75, distinct(3)=0.5, null(3)=0]
│ histogram(3)= 0 0 0.5 0
│ <--- 2 ----- Infinity
├── scan t100582
│ ├── columns: s:2(string) d:3(decimal)
│ └── stats: [rows=3, distinct(3)=3, null(3)=0]
│ histogram(3)= 0 0 0 1 1 0
│ <--- 0 --- 1 --- Infinity
└── filters
└── d:3 >= 2 [type=bool, outer=(3), immutable, constraints=(/3: [/2 - ]; tight)]

# Tests for selectivity of disjunctions

exec-ddl
Expand Down
19 changes: 13 additions & 6 deletions pkg/sql/opt/props/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,14 +743,21 @@ func getFilteredBucket(

// Calculate the new value for numRange.
var numRange float64
if isEqualityCondition {
switch {
case isEqualityCondition:
numRange = 0
} else if ok && rangeBefore > 0 {
// If we were successful in finding the ranges before and after filtering,
// calculate the fraction of values that should be assigned to the new
// bucket.
case ok && rangeBefore > 0:
// If we were successful in finding the ranges before and after
// filtering, calculate the fraction of values that should be assigned
// to the new bucket.
numRange = b.NumRange * rangeAfter / rangeBefore
} else {
if !math.IsNaN(numRange) {
break
}
// If the new value is NaN, fallthrough to the default case to estimate
// the numRange.
fallthrough
default:
// In the absence of any information, assume we reduced the size of the
// bucket by half.
numRange = 0.5 * b.NumRange
Expand Down

0 comments on commit 82e09ed

Please sign in to comment.