From 7b8ad4cf9365afef0ee7e6c3c94d64447a3b9739 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 5 Apr 2023 17:37:57 -0400 Subject: [PATCH] opt: fix histogram filtering with infinite upper bound This commit fixes a bug that caused the filtering a histogram bucket with an infinite upper bound to produce a bucket with a `numRange` of `NaN`. This led to bad query plans because the `NaN` did not reduce the input expression's row count at all. Fixes #100582 Release note (bug fix): A bug has been fixed that caused suboptimal query plans when a table contained infinite values, like `'+Infinity'::DECIMAL`. This bug is present since versions v22.1, and likely earlier. It could also be triggered in rare cases when table statistics forecasts created a forecasted bucket with an infinite value. --- pkg/sql/opt/memo/testdata/stats/select | 66 ++++++++++++++++++++++++++ pkg/sql/opt/props/histogram.go | 19 +++++--- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/pkg/sql/opt/memo/testdata/stats/select b/pkg/sql/opt/memo/testdata/stats/select index f8a348474939..fd54f92d9453 100644 --- a/pkg/sql/opt/memo/testdata/stats/select +++ b/pkg/sql/opt/memo/testdata/stats/select @@ -3580,6 +3580,72 @@ project │ └── b:3 = 1 [type=bool, outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] └── filters (true) +# Regression test for #100582. A bucket with an infinite upper bound should not +# create NaNs in filtered histograms. +exec-ddl +CREATE TABLE t100582 ( + id INT PRIMARY KEY, + s STRING NULL, + d DECIMAL NULL +); +---- + +exec-ddl +ALTER TABLE t100582 INJECT STATISTICS '[ + { + "columns": ["d"], + "row_count": 3, + "distinct_count": 3, + "null_count": 0, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 0, + "num_range": 0, + "upper_bound": "0" + }, + { + "distinct_range": 0, + "num_eq": 1, + "num_range": 0, + "upper_bound": "1" + }, + { + "distinct_range": 1, + "num_eq": 0, + "num_range": 1, + "upper_bound": "Infinity" + } + ], + "histo_col_type": "DECIMAL", + "created_at": "2023-04-04 07:47:43.132805", + "histo_version": 2 + } +]' +---- + +# The filtered histogram should not have a numRange of NaN. +norm +SELECT s FROM t100582 WHERE d >= 2; +---- +project + ├── columns: s:2(string) + ├── immutable + ├── stats: [rows=0.75] + └── select + ├── columns: s:2(string) d:3(decimal!null) + ├── immutable + ├── stats: [rows=0.75, distinct(3)=0.5, null(3)=0] + │ histogram(3)= 0 0 0.5 0 + │ <--- 2 ----- Infinity + ├── scan t100582 + │ ├── columns: s:2(string) d:3(decimal) + │ └── stats: [rows=3, distinct(3)=3, null(3)=0] + │ histogram(3)= 0 0 0 1 1 0 + │ <--- 0 --- 1 --- Infinity + └── filters + └── d:3 >= 2 [type=bool, outer=(3), immutable, constraints=(/3: [/2 - ]; tight)] + # Tests for selectivity of disjunctions exec-ddl diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index 24c424cf3e32..86bb42f4b6cd 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -743,14 +743,21 @@ func getFilteredBucket( // Calculate the new value for numRange. var numRange float64 - if isEqualityCondition { + switch { + case isEqualityCondition: numRange = 0 - } else if ok && rangeBefore > 0 { - // If we were successful in finding the ranges before and after filtering, - // calculate the fraction of values that should be assigned to the new - // bucket. + case ok && rangeBefore > 0: + // If we were successful in finding the ranges before and after + // filtering, calculate the fraction of values that should be assigned + // to the new bucket. numRange = b.NumRange * rangeAfter / rangeBefore - } else { + if !math.IsNaN(numRange) { + break + } + // If the new value is NaN, fallthrough to the default case to estimate + // the numRange. + fallthrough + default: // In the absence of any information, assume we reduced the size of the // bucket by half. numRange = 0.5 * b.NumRange