From 1afac4f9b6b395bcacb8ef62b1c18b7a10af2f08 Mon Sep 17 00:00:00 2001
From: Drew Kimball <drewk@cockroachlabs.com>
Date: Thu, 2 Nov 2023 15:03:00 -0600
Subject: [PATCH] sql/stats: don't use linear regression with NaN for stats
 forecasting

This patch adds a method to the quantile function, `invalid`, which checks
for `NaN` and `+/-Inf` values within the function. These indicate a failure
to derive a linear regression model for the observed histograms, and can lead
to internal errors and panics if not caught. Stats forecasting now checks this
method before attempting to use a quantile function to derive a histogram
prediction. This patch also reverts 120132e, since `fixMalformed` is no longer
expected to return an error.

Fixes #113645
Fixes #113680

Release note (bug fix): Fixed a bug that could cause internal errors or
panics while attempting to forecast statistics on a numeric column.
---
 pkg/sql/stats/BUILD.bazel                     |   1 -
 pkg/sql/stats/forecast.go                     |  12 +-
 pkg/sql/stats/quantile.go                     |  20 ++-
 pkg/sql/stats/quantile_test.go                | 114 ++++++++++++++++--
 .../stats/simple_linear_regression_test.go    |   6 +-
 5 files changed, 127 insertions(+), 26 deletions(-)

diff --git a/pkg/sql/stats/BUILD.bazel b/pkg/sql/stats/BUILD.bazel
index 924bd93325ad..aad2ae461d22 100644
--- a/pkg/sql/stats/BUILD.bazel
+++ b/pkg/sql/stats/BUILD.bazel
@@ -54,7 +54,6 @@ go_library(
         "//pkg/util/hlc",
         "//pkg/util/intsets",
         "//pkg/util/log",
-        "//pkg/util/log/logcrash",
         "//pkg/util/mon",
         "//pkg/util/protoutil",
         "//pkg/util/stop",
diff --git a/pkg/sql/stats/forecast.go b/pkg/sql/stats/forecast.go
index 35095e366398..fd6cfae44323 100644
--- a/pkg/sql/stats/forecast.go
+++ b/pkg/sql/stats/forecast.go
@@ -27,7 +27,6 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/sql/types"
 	"github.com/cockroachdb/cockroach/pkg/util/errorutil"
 	"github.com/cockroachdb/cockroach/pkg/util/log"
-	"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
 	"github.com/cockroachdb/errors"
 	"github.com/cockroachdb/redact"
 )
@@ -273,7 +272,7 @@ func forecastColumnStatistics(
 	// histogram. NOTE: If any of the observed histograms were for inverted
 	// indexes this will produce an incorrect histogram.
 	if observed[0].HistogramData != nil && observed[0].HistogramData.ColumnType != nil {
-		hist, err := predictHistogram(ctx, sv, observed, forecastAt, minRequiredFit, nonNullRowCount)
+		hist, err := predictHistogram(ctx, observed, forecastAt, minRequiredFit, nonNullRowCount)
 		if err != nil {
 			// If we did not successfully predict a histogram then copy the latest
 			// histogram so we can adjust it.
@@ -359,7 +358,6 @@ func forecastColumnStatistics(
 // predictHistogram tries to predict the histogram at forecast time.
 func predictHistogram(
 	ctx context.Context,
-	sv *settings.Values,
 	observed []*TableStatistic,
 	forecastAt float64,
 	minRequiredFit float64,
@@ -412,12 +410,10 @@ func predictHistogram(
 	// Construct a linear regression model of quantile functions over time, and
 	// use it to predict a quantile function at the given time.
 	yₙ, r2 := quantileSimpleLinearRegression(createdAts, quantiles, forecastAt)
-	var err error
-	yₙ, err = yₙ.fixMalformed()
-	if err != nil {
-		logcrash.ReportOrPanic(ctx, sv, "unable to fix malformed stats quantile: %v", yₙ)
-		return histogram{}, err
+	if yₙ.isInvalid() {
+		return histogram{}, errors.Newf("predicted histogram contains overflow values")
 	}
+	yₙ = yₙ.fixMalformed()
 	log.VEventf(
 		ctx, 3, "forecast for table %v columns %v predicted quantile %v R² %v",
 		tableID, columnIDs, yₙ, r2,
diff --git a/pkg/sql/stats/quantile.go b/pkg/sql/stats/quantile.go
index fc31808ea9eb..485c0963d6c6 100644
--- a/pkg/sql/stats/quantile.go
+++ b/pkg/sql/stats/quantile.go
@@ -656,10 +656,12 @@ func (q quantile) integrateSquared() float64 {
 // This function fixes the negative slope pieces by "moving" p from the
 // overlapping places to where it should be. No p is lost in the making of these
 // calculations.
-func (q quantile) fixMalformed() (quantile, error) {
+//
+// fixMalformed should not be called without checking that isInvalid() is false.
+func (q quantile) fixMalformed() quantile {
 	// Check for the happy case where q is already well-formed.
 	if q.isWellFormed() {
-		return q, nil
+		return q
 	}
 
 	// To fix a malformed quantile function, we recalculate p for each distinct
@@ -804,7 +806,7 @@ func (q quantile) fixMalformed() (quantile, error) {
 		// (> v) and we always add two intersectionPs for "double roots" touching
 		// our line from above (one endpoint > v, one endpoint = v).
 		if len(intersectionPs) == 0 {
-			return q, errors.New("unable to fix malformed stats quantile")
+			return q
 		}
 		lessEqP := intersectionPs[0]
 		for j := 1; j < len(intersectionPs); j += 2 {
@@ -821,7 +823,17 @@ func (q quantile) fixMalformed() (quantile, error) {
 			fixed = append(fixed, quantilePoint{p: lessEqP, v: val})
 		}
 	}
-	return fixed, nil
+	return fixed
+}
+
+// isInvalid returns true if q contains NaN or Inf values.
+func (q quantile) isInvalid() bool {
+	for i := range q {
+		if math.IsNaN(q[i].v) || math.IsInf(q[i].v, 0) {
+			return true
+		}
+	}
+	return false
 }
 
 // isWellFormed returns true if q is well-formed (i.e. is non-decreasing in v).
diff --git a/pkg/sql/stats/quantile_test.go b/pkg/sql/stats/quantile_test.go
index a7727384085e..f1f2cd7063f2 100644
--- a/pkg/sql/stats/quantile_test.go
+++ b/pkg/sql/stats/quantile_test.go
@@ -1348,10 +1348,7 @@ func TestQuantileOps(t *testing.T) {
 			if intSqMult0 != 0 {
 				t.Errorf("test case %d incorrect intSqMult0 %v expected 0", i, intSqMult0)
 			}
-			fixed, err := tc.q.fixMalformed()
-			if err != nil {
-				t.Errorf("test case %d has an unexpected error: %s", i, err)
-			}
+			fixed := tc.q.fixMalformed()
 			if !reflect.DeepEqual(fixed, tc.fixed) {
 				t.Errorf("test case %d incorrect fixed %v expected %v", i, fixed, tc.fixed)
 			}
@@ -1410,10 +1407,7 @@ func TestQuantileOpsRandom(t *testing.T) {
 			if intSqMult0 != 0 {
 				t.Errorf("seed %v quantile %v incorrect intSqMult0 %v expected 0", seed, q, intSqMult0)
 			}
-			fixed, err := q.fixMalformed()
-			if err != nil {
-				t.Errorf("seed %v quantile %v had an unexpected error: %s", seed, q, err)
-			}
+			fixed := q.fixMalformed()
 			intSqFixed := fixed.integrateSquared()
 			// This seems like it should run into floating point errors, but it hasn't
 			// yet, so yay?
@@ -1423,3 +1417,107 @@ func TestQuantileOpsRandom(t *testing.T) {
 		})
 	}
 }
+
+// TestQuantileInvalid tests the quantile.invalid() method.
+func TestQuantileInvalid(t *testing.T) {
+	testCases := []struct {
+		q       quantile
+		invalid bool
+	}{
+		// Valid quantiles.
+		{
+			q: zeroQuantile,
+		},
+		{
+			q: quantile{{0, 100}, {1, 200}},
+		},
+		{
+			q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, 400}},
+		},
+		{
+			q: quantile{{0, -811}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
+		},
+		{
+			q: quantile{{0, 102}, {0.125, 102.25}, {0.25, 202.5}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}},
+		},
+		// Invalid quantiles with NaN.
+		{
+			q:       quantile{{0, 100}, {1, math.NaN()}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, math.NaN()}, {1, 200}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.NaN()}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.NaN()}, {1, math.NaN()}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, math.NaN()}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, math.NaN()}, {0.125, math.NaN()}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.NaN()}, {0.5, 103}, {0.625, math.NaN()}, {0.75, 3.5}, {0.875, 3.75}, {0.875, math.NaN()}, {0.875, 403.75}, {1, 104}},
+			invalid: true,
+		},
+		// Invalid quantiles with Inf.
+		{
+			q:       quantile{{0, 100}, {1, math.Inf(1)}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, math.Inf(1)}, {1, 200}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(1)}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(1)}, {1, math.Inf(1)}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 100}, {1, math.Inf(-1)}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, math.Inf(-1)}, {1, 200}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(-1)}},
+			invalid: true,
+		},
+		{
+			q:       quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(-1)}, {1, math.Inf(-1)}},
+			invalid: true,
+		},
+		// Invalid quantiles with NaN and Inf.
+		{
+			q:       quantile{{0, math.Inf(-1)}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.Inf(1)}, {0.5, 103}, {0.625, 103.25}, {0.75, math.NaN()}, {0.875, math.Inf(1)}, {0.875, math.Inf(-1)}, {0.875, 403.75}, {1, 104}},
+			invalid: true,
+		},
+	}
+	for i, tc := range testCases {
+		t.Run(strconv.Itoa(i), func(t *testing.T) {
+			invalid := tc.q.isInvalid()
+			if invalid != tc.invalid {
+				t.Errorf("test case %d expected invalid to be %v, but was %v", i, tc.invalid, invalid)
+			}
+		})
+	}
+}
diff --git a/pkg/sql/stats/simple_linear_regression_test.go b/pkg/sql/stats/simple_linear_regression_test.go
index eb68db786050..d1f44a6feaef 100644
--- a/pkg/sql/stats/simple_linear_regression_test.go
+++ b/pkg/sql/stats/simple_linear_regression_test.go
@@ -286,11 +286,7 @@ func TestQuantileSimpleLinearRegression(t *testing.T) {
 	for i, tc := range testCases {
 		t.Run(strconv.Itoa(i), func(t *testing.T) {
 			yn, r2 := quantileSimpleLinearRegression(tc.x, tc.y, tc.xn)
-			var err error
-			yn, err = yn.fixMalformed()
-			if err != nil {
-				t.Errorf("test case %d had an unexpected error: %s", i, err)
-			}
+			yn = yn.fixMalformed()
 			if !reflect.DeepEqual(yn, tc.yn) {
 				t.Errorf("test case %d incorrect yn %v expected %v", i, yn, tc.yn)
 			}