From 98d1e0c74770a6fa6c932877ec9177bf0e58019a Mon Sep 17 00:00:00 2001
From: Michael Erickson <michae2@cockroachlabs.com>
Date: Wed, 17 Aug 2022 16:11:04 -0700
Subject: [PATCH] sql/stats: remove NumRange-stealing behavior from histogram
 prediction

We should be able to handle NumEq=0 just fine everywhere that uses
histograms, so delete this NumRange-stealing code.

Fixes: #86344

Release justification: low-risk updates to new functionality.

Release note: None
---
 .../opt/exec/execbuilder/testdata/forecast    | 139 +++++++++++++++++-
 pkg/sql/stats/forecast_test.go                |  10 +-
 pkg/sql/stats/quantile.go                     |   9 --
 pkg/sql/stats/quantile_test.go                |  16 +-
 4 files changed, 151 insertions(+), 23 deletions(-)

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/forecast b/pkg/sql/opt/exec/execbuilder/testdata/forecast
index 17d009ac4d28..8d5d5b71efbe 100644
--- a/pkg/sql/opt/exec/execbuilder/testdata/forecast
+++ b/pkg/sql/opt/exec/execbuilder/testdata/forecast
@@ -592,8 +592,145 @@ scan c
  ├── columns: h:1
  ├── constraint: /1: [/'1988-08-07 00:00:00+00:00' - ]
  ├── stats: [rows=24, distinct(1)=24, null(1)=0, avgsize(1)=7]
- │   histogram(1)=  0               1               5               1               5               1               5               1               4               1
+ │   histogram(1)=  0               1               5               1               5               1               5               1               5               0
  │                <--- '1988-08-07 00:00:00+00:00' --- '1988-08-07 06:00:00+00:00' --- '1988-08-07 12:00:00+00:00' --- '1988-08-07 18:00:00+00:00' --- '1988-08-08 00:00:00+00:00'
  ├── cost: 39.7
  ├── key: (1)
  └── distribution: test
+
+# Test for issue 86344.
+
+statement ok
+CREATE TABLE x (a INT PRIMARY KEY) WITH (sql_stats_automatic_collection_enabled = false);
+
+statement ok
+ALTER TABLE x INJECT STATISTICS '[
+      {
+          "avg_size": 1,
+          "columns": [
+              "a"
+          ],
+          "created_at": "2020-03-13 00:00:00.000000",
+          "distinct_count": 4,
+          "histo_buckets": [
+              {
+                  "distinct_range": 0,
+                  "num_eq": 0,
+                  "num_range": 0,
+                  "upper_bound": "4"
+              },
+              {
+                  "distinct_range": 2,
+                  "num_eq": 0,
+                  "num_range": 2,
+                  "upper_bound": "7"
+              },
+              {
+                  "distinct_range": 2,
+                  "num_eq": 0,
+                  "num_range": 2,
+                  "upper_bound": "10"
+              }
+          ],
+          "histo_col_type": "INT8",
+          "histo_version": 2,
+          "name": "__auto__",
+          "null_count": 0,
+          "row_count": 4
+      },
+      {
+          "avg_size": 1,
+          "columns": [
+              "a"
+          ],
+          "created_at": "2020-03-14 00:00:00.000000",
+          "distinct_count": 4,
+          "histo_buckets": [
+              {
+                  "distinct_range": 0,
+                  "num_eq": 0,
+                  "num_range": 0,
+                  "upper_bound": "7"
+              },
+              {
+                  "distinct_range": 2,
+                  "num_eq": 0,
+                  "num_range": 2,
+                  "upper_bound": "10"
+              },
+              {
+                  "distinct_range": 2,
+                  "num_eq": 0,
+                  "num_range": 2,
+                  "upper_bound": "13"
+              }
+          ],
+          "histo_col_type": "INT8",
+          "histo_version": 2,
+          "name": "__auto__",
+          "null_count": 0,
+          "row_count": 4
+      },
+      {
+          "avg_size": 1,
+          "columns": [
+              "a"
+          ],
+          "created_at": "2020-03-15 00:00:00.000000",
+          "distinct_count": 4,
+          "histo_buckets": [
+              {
+                  "distinct_range": 0,
+                  "num_eq": 0,
+                  "num_range": 0,
+                  "upper_bound": "10"
+              },
+              {
+                  "distinct_range": 2,
+                  "num_eq": 0,
+                  "num_range": 2,
+                  "upper_bound": "13"
+              },
+              {
+                  "distinct_range": 2,
+                  "num_eq": 0,
+                  "num_range": 2,
+                  "upper_bound": "16"
+              }
+          ],
+          "histo_col_type": "INT8",
+          "histo_version": 2,
+          "name": "__auto__",
+          "null_count": 0,
+          "row_count": 4
+      }
+]';
+
+query T
+SELECT jsonb_pretty(stat->'histo_buckets')
+FROM (
+  SELECT jsonb_array_elements(statistics) AS stat
+  FROM [SHOW STATISTICS USING JSON FOR TABLE x WITH FORECAST]
+)
+WHERE stat->>'name' = '__forecast__';
+----
+[
+    {
+        "distinct_range": 0,
+        "num_eq": 0,
+        "num_range": 0,
+        "upper_bound": "13"
+    },
+    {
+        "distinct_range": 2,
+        "num_eq": 0,
+        "num_range": 2,
+        "upper_bound": "16"
+    },
+    {
+        "distinct_range": 2,
+        "num_eq": 0,
+        "num_range": 2,
+        "upper_bound": "19"
+    }
+]
diff --git a/pkg/sql/stats/forecast_test.go b/pkg/sql/stats/forecast_test.go
index 65d384711964..271461e6b80c 100644
--- a/pkg/sql/stats/forecast_test.go
+++ b/pkg/sql/stats/forecast_test.go
@@ -490,22 +490,22 @@ func TestForecastColumnStatistics(t *testing.T) {
 					hist: testHistogram{},
 				},
 				{
-					at: 2, row: 5, dist: 2, null: 3, size: 2,
+					at: 2, row: 5, dist: 3, null: 3, size: 2,
 					hist: testHistogram{{1, 0, 0, 200}, {0, 1, 1, 800}},
 				},
 				{
-					at: 3, row: 7, dist: 3, null: 3, size: 2,
+					at: 3, row: 7, dist: 4, null: 3, size: 2,
 					hist: testHistogram{{2, 0, 0, 200}, {0, 2, 2, 800}},
 				},
 				{
-					at: 4, row: 9, dist: 4, null: 3, size: 2,
+					at: 4, row: 9, dist: 5, null: 3, size: 2,
 					hist: testHistogram{{3, 0, 0, 200}, {0, 3, 3, 800}},
 				},
 			},
 			at: 5,
 			forecast: &testStat{
-				at: 5, row: 11, dist: 5, null: 3, size: 2,
-				hist: testHistogram{{4, 0, 0, 200}, {1, 3, 2, 800}},
+				at: 5, row: 11, dist: 6, null: 3, size: 2,
+				hist: testHistogram{{4, 0, 0, 200}, {0, 4, 4, 800}},
 			},
 		},
 		// Histogram, constant numbers but changing shape
diff --git a/pkg/sql/stats/quantile.go b/pkg/sql/stats/quantile.go
index 2e69d6cd154c..e8f68d5e8a66 100644
--- a/pkg/sql/stats/quantile.go
+++ b/pkg/sql/stats/quantile.go
@@ -282,15 +282,6 @@ func (q quantile) toHistogram(colType *types.T, rowCount float64) (histogram, er
 		if !isValidCount(numEq) {
 			return errors.AssertionFailedf("invalid histogram NumEq: %v", numEq)
 		}
-		if numEq < 1 && currentBucket.NumRange+numEq >= 2 {
-			// Steal from NumRange so that NumEq is at least 1, if it wouldn't make
-			// NumRange 0. This makes the histogram look more like something
-			// EquiDepthHistogram would produce.
-			// TODO(michae2): Consider removing this logic if statistics_builder
-			// doesn't need it.
-			currentBucket.NumRange -= 1 - numEq
-			numEq = 1
-		}
 		currentBucket.NumEq = numEq
 
 		// Calculate DistinctRange for this bucket now that NumRange is finalized.
diff --git a/pkg/sql/stats/quantile_test.go b/pkg/sql/stats/quantile_test.go
index 48381cd1898f..320d5b3dc4ae 100644
--- a/pkg/sql/stats/quantile_test.go
+++ b/pkg/sql/stats/quantile_test.go
@@ -479,9 +479,9 @@ func TestQuantileToHistogram(t *testing.T) {
 			hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}},
 		},
 		{
-			qfun: quantile{{0, 0}, {0.9, 100}, {1, 100}},
-			rows: 10,
-			hist: testHistogram{{0, 0, 0, 0}, {1, 9, 9, 100}},
+			qfun: quantile{{0, 0}, {0.9375, 100}, {1, 100}},
+			rows: 16,
+			hist: testHistogram{{0, 0, 0, 0}, {1, 15, 15, 100}},
 		},
 		{
 			qfun: quantile{{0, 100}, {0.25, 100}, {0.75, 200}, {1, 200}},
@@ -503,26 +503,26 @@ func TestQuantileToHistogram(t *testing.T) {
 			rows: 32,
 			hist: testHistogram{{4, 0, 0, 310}, {4, 0, 0, 320}, {8, 0, 0, 330}, {4, 0, 0, 340}, {4, 0, 0, 350}, {4, 0, 0, 360}, {4, 0, 0, 370}},
 		},
-		// Cases where we steal a row from NumRange to give to NumEq.
+		// Cases with 0 NumEq.
 		{
 			qfun: quantile{{0, 0}, {1, 100}},
 			rows: 2,
-			hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}},
+			hist: testHistogram{{0, 0, 0, 0}, {0, 2, 2, 100}},
 		},
 		{
 			qfun: quantile{{0, 100}, {0.5, 100}, {1, 200}, {1, 300}},
 			rows: 4,
-			hist: testHistogram{{2, 0, 0, 100}, {1, 1, 1, 200}},
+			hist: testHistogram{{2, 0, 0, 100}, {0, 2, 2, 200}},
 		},
 		{
 			qfun: quantile{{0, 0}, {0.875, 87.5}, {1, 100}},
 			rows: 8,
-			hist: testHistogram{{0, 0, 0, 0}, {1, 6, 6, 87.5}, {0, 1, 1, 100}},
+			hist: testHistogram{{0, 0, 0, 0}, {0, 7, 7, 87.5}, {0, 1, 1, 100}},
 		},
 		{
 			qfun: quantile{{0, 400}, {0.5, 600}, {0.75, 700}, {1, 800}},
 			rows: 16,
-			hist: testHistogram{{0, 0, 0, 400}, {1, 7, 7, 600}, {1, 3, 3, 700}, {1, 3, 3, 800}},
+			hist: testHistogram{{0, 0, 0, 400}, {0, 8, 8, 600}, {0, 4, 4, 700}, {0, 4, 4, 800}},
 		},
 		// Error cases.
 		{