From 1afac4f9b6b395bcacb8ef62b1c18b7a10af2f08 Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Thu, 2 Nov 2023 15:03:00 -0600 Subject: [PATCH 1/2] sql/stats: don't use linear regression with NaN for stats forecasting This patch adds a method to the quantile function, `invalid`, which checks for `NaN` and `+/-Inf` values within the function. These indicate a failure to derive a linear regression model for the observed histograms, and can lead to internal errors and panics if not caught. Stats forecasting now checks this method before attempting to use a quantile function to derive a histogram prediction. This patch also reverts 120132e, since `fixMalformed` is no longer expected to return an error. Fixes #113645 Fixes #113680 Release note (bug fix): Fixed a bug that could cause internal errors or panics while attempting to forecast statistics on a numeric column. --- pkg/sql/stats/BUILD.bazel | 1 - pkg/sql/stats/forecast.go | 12 +- pkg/sql/stats/quantile.go | 20 ++- pkg/sql/stats/quantile_test.go | 114 ++++++++++++++++-- .../stats/simple_linear_regression_test.go | 6 +- 5 files changed, 127 insertions(+), 26 deletions(-) diff --git a/pkg/sql/stats/BUILD.bazel b/pkg/sql/stats/BUILD.bazel index 924bd93325ad..aad2ae461d22 100644 --- a/pkg/sql/stats/BUILD.bazel +++ b/pkg/sql/stats/BUILD.bazel @@ -54,7 +54,6 @@ go_library( "//pkg/util/hlc", "//pkg/util/intsets", "//pkg/util/log", - "//pkg/util/log/logcrash", "//pkg/util/mon", "//pkg/util/protoutil", "//pkg/util/stop", diff --git a/pkg/sql/stats/forecast.go b/pkg/sql/stats/forecast.go index 35095e366398..fd6cfae44323 100644 --- a/pkg/sql/stats/forecast.go +++ b/pkg/sql/stats/forecast.go @@ -27,7 +27,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/errorutil" "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/log/logcrash" "github.com/cockroachdb/errors" "github.com/cockroachdb/redact" ) @@ -273,7 +272,7 @@ func forecastColumnStatistics( // histogram. NOTE: If any of the observed histograms were for inverted // indexes this will produce an incorrect histogram. if observed[0].HistogramData != nil && observed[0].HistogramData.ColumnType != nil { - hist, err := predictHistogram(ctx, sv, observed, forecastAt, minRequiredFit, nonNullRowCount) + hist, err := predictHistogram(ctx, observed, forecastAt, minRequiredFit, nonNullRowCount) if err != nil { // If we did not successfully predict a histogram then copy the latest // histogram so we can adjust it. @@ -359,7 +358,6 @@ func forecastColumnStatistics( // predictHistogram tries to predict the histogram at forecast time. func predictHistogram( ctx context.Context, - sv *settings.Values, observed []*TableStatistic, forecastAt float64, minRequiredFit float64, @@ -412,12 +410,10 @@ func predictHistogram( // Construct a linear regression model of quantile functions over time, and // use it to predict a quantile function at the given time. yₙ, r2 := quantileSimpleLinearRegression(createdAts, quantiles, forecastAt) - var err error - yₙ, err = yₙ.fixMalformed() - if err != nil { - logcrash.ReportOrPanic(ctx, sv, "unable to fix malformed stats quantile: %v", yₙ) - return histogram{}, err + if yₙ.isInvalid() { + return histogram{}, errors.Newf("predicted histogram contains overflow values") } + yₙ = yₙ.fixMalformed() log.VEventf( ctx, 3, "forecast for table %v columns %v predicted quantile %v R² %v", tableID, columnIDs, yₙ, r2, diff --git a/pkg/sql/stats/quantile.go b/pkg/sql/stats/quantile.go index fc31808ea9eb..485c0963d6c6 100644 --- a/pkg/sql/stats/quantile.go +++ b/pkg/sql/stats/quantile.go @@ -656,10 +656,12 @@ func (q quantile) integrateSquared() float64 { // This function fixes the negative slope pieces by "moving" p from the // overlapping places to where it should be. No p is lost in the making of these // calculations. -func (q quantile) fixMalformed() (quantile, error) { +// +// fixMalformed should not be called without checking that isInvalid() is false. +func (q quantile) fixMalformed() quantile { // Check for the happy case where q is already well-formed. if q.isWellFormed() { - return q, nil + return q } // To fix a malformed quantile function, we recalculate p for each distinct @@ -804,7 +806,7 @@ func (q quantile) fixMalformed() (quantile, error) { // (> v) and we always add two intersectionPs for "double roots" touching // our line from above (one endpoint > v, one endpoint = v). if len(intersectionPs) == 0 { - return q, errors.New("unable to fix malformed stats quantile") + return q } lessEqP := intersectionPs[0] for j := 1; j < len(intersectionPs); j += 2 { @@ -821,7 +823,17 @@ func (q quantile) fixMalformed() (quantile, error) { fixed = append(fixed, quantilePoint{p: lessEqP, v: val}) } } - return fixed, nil + return fixed +} + +// isInvalid returns true if q contains NaN or Inf values. +func (q quantile) isInvalid() bool { + for i := range q { + if math.IsNaN(q[i].v) || math.IsInf(q[i].v, 0) { + return true + } + } + return false } // isWellFormed returns true if q is well-formed (i.e. is non-decreasing in v). diff --git a/pkg/sql/stats/quantile_test.go b/pkg/sql/stats/quantile_test.go index a7727384085e..f1f2cd7063f2 100644 --- a/pkg/sql/stats/quantile_test.go +++ b/pkg/sql/stats/quantile_test.go @@ -1348,10 +1348,7 @@ func TestQuantileOps(t *testing.T) { if intSqMult0 != 0 { t.Errorf("test case %d incorrect intSqMult0 %v expected 0", i, intSqMult0) } - fixed, err := tc.q.fixMalformed() - if err != nil { - t.Errorf("test case %d has an unexpected error: %s", i, err) - } + fixed := tc.q.fixMalformed() if !reflect.DeepEqual(fixed, tc.fixed) { t.Errorf("test case %d incorrect fixed %v expected %v", i, fixed, tc.fixed) } @@ -1410,10 +1407,7 @@ func TestQuantileOpsRandom(t *testing.T) { if intSqMult0 != 0 { t.Errorf("seed %v quantile %v incorrect intSqMult0 %v expected 0", seed, q, intSqMult0) } - fixed, err := q.fixMalformed() - if err != nil { - t.Errorf("seed %v quantile %v had an unexpected error: %s", seed, q, err) - } + fixed := q.fixMalformed() intSqFixed := fixed.integrateSquared() // This seems like it should run into floating point errors, but it hasn't // yet, so yay? @@ -1423,3 +1417,107 @@ func TestQuantileOpsRandom(t *testing.T) { }) } } + +// TestQuantileInvalid tests the quantile.invalid() method. +func TestQuantileInvalid(t *testing.T) { + testCases := []struct { + q quantile + invalid bool + }{ + // Valid quantiles. + { + q: zeroQuantile, + }, + { + q: quantile{{0, 100}, {1, 200}}, + }, + { + q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, 400}}, + }, + { + q: quantile{{0, -811}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}}, + }, + { + q: quantile{{0, 102}, {0.125, 102.25}, {0.25, 202.5}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}}, + }, + // Invalid quantiles with NaN. + { + q: quantile{{0, 100}, {1, math.NaN()}}, + invalid: true, + }, + { + q: quantile{{0, math.NaN()}, {1, 200}}, + invalid: true, + }, + { + q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.NaN()}}, + invalid: true, + }, + { + q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.NaN()}, {1, math.NaN()}}, + invalid: true, + }, + { + q: quantile{{0, math.NaN()}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}}, + invalid: true, + }, + { + q: quantile{{0, math.NaN()}, {0.125, math.NaN()}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}}, + invalid: true, + }, + { + q: quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}}, + invalid: true, + }, + { + q: quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.NaN()}, {0.5, 103}, {0.625, math.NaN()}, {0.75, 3.5}, {0.875, 3.75}, {0.875, math.NaN()}, {0.875, 403.75}, {1, 104}}, + invalid: true, + }, + // Invalid quantiles with Inf. + { + q: quantile{{0, 100}, {1, math.Inf(1)}}, + invalid: true, + }, + { + q: quantile{{0, math.Inf(1)}, {1, 200}}, + invalid: true, + }, + { + q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(1)}}, + invalid: true, + }, + { + q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(1)}, {1, math.Inf(1)}}, + invalid: true, + }, + { + q: quantile{{0, 100}, {1, math.Inf(-1)}}, + invalid: true, + }, + { + q: quantile{{0, math.Inf(-1)}, {1, 200}}, + invalid: true, + }, + { + q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(-1)}}, + invalid: true, + }, + { + q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(-1)}, {1, math.Inf(-1)}}, + invalid: true, + }, + // Invalid quantiles with NaN and Inf. + { + q: quantile{{0, math.Inf(-1)}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.Inf(1)}, {0.5, 103}, {0.625, 103.25}, {0.75, math.NaN()}, {0.875, math.Inf(1)}, {0.875, math.Inf(-1)}, {0.875, 403.75}, {1, 104}}, + invalid: true, + }, + } + for i, tc := range testCases { + t.Run(strconv.Itoa(i), func(t *testing.T) { + invalid := tc.q.isInvalid() + if invalid != tc.invalid { + t.Errorf("test case %d expected invalid to be %v, but was %v", i, tc.invalid, invalid) + } + }) + } +} diff --git a/pkg/sql/stats/simple_linear_regression_test.go b/pkg/sql/stats/simple_linear_regression_test.go index eb68db786050..d1f44a6feaef 100644 --- a/pkg/sql/stats/simple_linear_regression_test.go +++ b/pkg/sql/stats/simple_linear_regression_test.go @@ -286,11 +286,7 @@ func TestQuantileSimpleLinearRegression(t *testing.T) { for i, tc := range testCases { t.Run(strconv.Itoa(i), func(t *testing.T) { yn, r2 := quantileSimpleLinearRegression(tc.x, tc.y, tc.xn) - var err error - yn, err = yn.fixMalformed() - if err != nil { - t.Errorf("test case %d had an unexpected error: %s", i, err) - } + yn = yn.fixMalformed() if !reflect.DeepEqual(yn, tc.yn) { t.Errorf("test case %d incorrect yn %v expected %v", i, yn, tc.yn) } From 5d0e071091f0558b74061c33a4a1cc5c41c3c65a Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Fri, 3 Nov 2023 02:25:36 -0600 Subject: [PATCH 2/2] sql/stats: add panic-catching to GetTableStats This patch adds panic-catching logic to `GetTableStats`, so that "safe" panics (out-of-bounds, assertion etc.) can be propagated as a returned error. This allows some code-paths to ignore the returned error, so that execution can proceed even there's an unexpected error in the stats code. This prevents situations where it becomes impossible to query a table because of a stats bug. Informs #113645 Release note: None --- pkg/sql/stats/stats_cache.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 37f81290d473..9411ad15f221 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -36,6 +36,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/cache" "github.com/cockroachdb/cockroach/pkg/util/encoding" + "github.com/cockroachdb/cockroach/pkg/util/errorutil" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" @@ -214,10 +215,23 @@ func decodeTableStatisticsKV( // The statistics are ordered by their CreatedAt time (newest-to-oldest). func (sc *TableStatisticsCache) GetTableStats( ctx context.Context, table catalog.TableDescriptor, -) ([]*TableStatistic, error) { +) (stats []*TableStatistic, err error) { if !statsUsageAllowed(table, sc.settings) { return nil, nil } + defer func() { + if r := recover(); r != nil { + // In the event of a "safe" panic, we only want to log the error and + // continue executing the query without stats for this table. + if ok, e := errorutil.ShouldCatch(r); ok { + err = e + } else { + // Other panic objects can't be considered "safe" and thus are + // propagated as crashes that terminate the session. + panic(r) + } + } + }() forecast := forecastAllowed(table, sc.settings) return sc.getTableStatsFromCache(ctx, table.GetID(), &forecast) }