Merge #100763

100763: opt: fix histogram filtering with infinite upper bound r=mgartner a=mgartner This commit fixes a bug that caused the filtering a histogram bucket with an infinite upper bound to produce a bucket with a `numRange` of `NaN`. This led to bad query plans because the `NaN` did not reduce the input expression's row count at all. Fixes #100582 Release note (bug fix): A bug has been fixed that caused suboptimal query plans when a table contained infinite values, like `'+Infinity'::DECIMAL`. This bug is present since versions v22.1, and likely earlier. It could also be triggered in rare cases when table statistics forecasts created a forecasted bucket with an infinite value. Co-authored-by: Marcus Gartner <[email protected]>
cockroachdb · Apr 10, 2023 · 82e09ed · 82e09ed
2 parents 9ce0ac2 + 7b8ad4c
commit 82e09ed
Show file tree

Hide file tree

Showing 2 changed files with 79 additions and 6 deletions.
diff --git a/pkg/sql/opt/memo/testdata/stats/select b/pkg/sql/opt/memo/testdata/stats/select
@@ -3580,6 +3580,72 @@ project
       │         └── b:3 = 1 [type=bool, outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)]
       └── filters (true)
 
+# Regression test for #100582. A bucket with an infinite upper bound should not
+# create NaNs in filtered histograms.
+exec-ddl
+CREATE TABLE t100582 (
+  id INT PRIMARY KEY,
+  s STRING NULL,
+  d DECIMAL NULL
+);
+----
+
+exec-ddl
+ALTER TABLE t100582 INJECT STATISTICS '[
+  {
+    "columns": ["d"],
+    "row_count": 3,
+    "distinct_count": 3,
+    "null_count": 0,
+    "histo_buckets": [
+      {
+        "distinct_range": 0,
+        "num_eq": 0,
+        "num_range": 0,
+        "upper_bound": "0"
+      },
+      {
+        "distinct_range": 0,
+        "num_eq": 1,
+        "num_range": 0,
+        "upper_bound": "1"
+      },
+      {
+        "distinct_range": 1,
+        "num_eq": 0,
+        "num_range": 1,
+        "upper_bound": "Infinity"
+      }
+    ],
+    "histo_col_type": "DECIMAL",
+    "created_at": "2023-04-04 07:47:43.132805",
+    "histo_version": 2
+  }
+]'
+----
+
+# The filtered histogram should not have a numRange of NaN.
+norm
+SELECT s FROM t100582 WHERE d >= 2;
+----
+project
+ ├── columns: s:2(string)
+ ├── immutable
+ ├── stats: [rows=0.75]
+ └── select
+      ├── columns: s:2(string) d:3(decimal!null)
+      ├── immutable
+      ├── stats: [rows=0.75, distinct(3)=0.5, null(3)=0]
+      │   histogram(3)=  0  0  0.5     0
+      │                <--- 2 ----- Infinity
+      ├── scan t100582
+      │    ├── columns: s:2(string) d:3(decimal)
+      │    └── stats: [rows=3, distinct(3)=3, null(3)=0]
+      │        histogram(3)=  0  0  0  1  1     0
+      │                     <--- 0 --- 1 --- Infinity
+      └── filters
+           └── d:3 >= 2 [type=bool, outer=(3), immutable, constraints=(/3: [/2 - ]; tight)]
+
 # Tests for selectivity of disjunctions
 
 exec-ddl

diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go
@@ -743,14 +743,21 @@ func getFilteredBucket(
 
 	// Calculate the new value for numRange.
 	var numRange float64
-	if isEqualityCondition {
+	switch {
+	case isEqualityCondition:
 		numRange = 0
-	} else if ok && rangeBefore > 0 {
-		// If we were successful in finding the ranges before and after filtering,
-		// calculate the fraction of values that should be assigned to the new
-		// bucket.
+	case ok && rangeBefore > 0:
+		// If we were successful in finding the ranges before and after
+		// filtering, calculate the fraction of values that should be assigned
+		// to the new bucket.
 		numRange = b.NumRange * rangeAfter / rangeBefore
-	} else {
+		if !math.IsNaN(numRange) {
+			break
+		}
+		// If the new value is NaN, fallthrough to the default case to estimate
+		// the numRange.
+		fallthrough
+	default:
 		// In the absence of any information, assume we reduced the size of the
 		// bucket by half.
 		numRange = 0.5 * b.NumRange