Skip to content

Commit

Permalink
sql/stats: don't use linear regression with NaN for stats forecasting
Browse files Browse the repository at this point in the history
This patch adds a method to the quantile function, `invalid`, which checks
for `NaN` and `+/-Inf` values within the function. These indicate a failure
to derive a linear regression model for the observed histograms, and can lead
to internal errors and panics if not caught. Stats forecasting now checks this
method before attempting to use a quantile function to derive a histogram
prediction. This patch also reverts 120132e, since `fixMalformed` is no longer
expected to return an error.

Fixes #113645
Fixes #113680

Release note (bug fix): Fixed a bug that could cause internal errors or
panics while attempting to forecast statistics on a numeric column.
  • Loading branch information
DrewKimball committed Nov 6, 2023
1 parent 80415cb commit 1afac4f
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 26 deletions.
1 change: 0 additions & 1 deletion pkg/sql/stats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ go_library(
"//pkg/util/hlc",
"//pkg/util/intsets",
"//pkg/util/log",
"//pkg/util/log/logcrash",
"//pkg/util/mon",
"//pkg/util/protoutil",
"//pkg/util/stop",
Expand Down
12 changes: 4 additions & 8 deletions pkg/sql/stats/forecast.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
)
Expand Down Expand Up @@ -273,7 +272,7 @@ func forecastColumnStatistics(
// histogram. NOTE: If any of the observed histograms were for inverted
// indexes this will produce an incorrect histogram.
if observed[0].HistogramData != nil && observed[0].HistogramData.ColumnType != nil {
hist, err := predictHistogram(ctx, sv, observed, forecastAt, minRequiredFit, nonNullRowCount)
hist, err := predictHistogram(ctx, observed, forecastAt, minRequiredFit, nonNullRowCount)
if err != nil {
// If we did not successfully predict a histogram then copy the latest
// histogram so we can adjust it.
Expand Down Expand Up @@ -359,7 +358,6 @@ func forecastColumnStatistics(
// predictHistogram tries to predict the histogram at forecast time.
func predictHistogram(
ctx context.Context,
sv *settings.Values,
observed []*TableStatistic,
forecastAt float64,
minRequiredFit float64,
Expand Down Expand Up @@ -412,12 +410,10 @@ func predictHistogram(
// Construct a linear regression model of quantile functions over time, and
// use it to predict a quantile function at the given time.
yₙ, r2 := quantileSimpleLinearRegression(createdAts, quantiles, forecastAt)
var err error
yₙ, err = yₙ.fixMalformed()
if err != nil {
logcrash.ReportOrPanic(ctx, sv, "unable to fix malformed stats quantile: %v", yₙ)
return histogram{}, err
if yₙ.isInvalid() {
return histogram{}, errors.Newf("predicted histogram contains overflow values")
}
yₙ = yₙ.fixMalformed()
log.VEventf(
ctx, 3, "forecast for table %v columns %v predicted quantile %v R² %v",
tableID, columnIDs, yₙ, r2,
Expand Down
20 changes: 16 additions & 4 deletions pkg/sql/stats/quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,10 +656,12 @@ func (q quantile) integrateSquared() float64 {
// This function fixes the negative slope pieces by "moving" p from the
// overlapping places to where it should be. No p is lost in the making of these
// calculations.
func (q quantile) fixMalformed() (quantile, error) {
//
// fixMalformed should not be called without checking that isInvalid() is false.
func (q quantile) fixMalformed() quantile {
// Check for the happy case where q is already well-formed.
if q.isWellFormed() {
return q, nil
return q
}

// To fix a malformed quantile function, we recalculate p for each distinct
Expand Down Expand Up @@ -804,7 +806,7 @@ func (q quantile) fixMalformed() (quantile, error) {
// (> v) and we always add two intersectionPs for "double roots" touching
// our line from above (one endpoint > v, one endpoint = v).
if len(intersectionPs) == 0 {
return q, errors.New("unable to fix malformed stats quantile")
return q
}
lessEqP := intersectionPs[0]
for j := 1; j < len(intersectionPs); j += 2 {
Expand All @@ -821,7 +823,17 @@ func (q quantile) fixMalformed() (quantile, error) {
fixed = append(fixed, quantilePoint{p: lessEqP, v: val})
}
}
return fixed, nil
return fixed
}

// isInvalid returns true if q contains NaN or Inf values.
func (q quantile) isInvalid() bool {
for i := range q {
if math.IsNaN(q[i].v) || math.IsInf(q[i].v, 0) {
return true
}
}
return false
}

// isWellFormed returns true if q is well-formed (i.e. is non-decreasing in v).
Expand Down
114 changes: 106 additions & 8 deletions pkg/sql/stats/quantile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1348,10 +1348,7 @@ func TestQuantileOps(t *testing.T) {
if intSqMult0 != 0 {
t.Errorf("test case %d incorrect intSqMult0 %v expected 0", i, intSqMult0)
}
fixed, err := tc.q.fixMalformed()
if err != nil {
t.Errorf("test case %d has an unexpected error: %s", i, err)
}
fixed := tc.q.fixMalformed()
if !reflect.DeepEqual(fixed, tc.fixed) {
t.Errorf("test case %d incorrect fixed %v expected %v", i, fixed, tc.fixed)
}
Expand Down Expand Up @@ -1410,10 +1407,7 @@ func TestQuantileOpsRandom(t *testing.T) {
if intSqMult0 != 0 {
t.Errorf("seed %v quantile %v incorrect intSqMult0 %v expected 0", seed, q, intSqMult0)
}
fixed, err := q.fixMalformed()
if err != nil {
t.Errorf("seed %v quantile %v had an unexpected error: %s", seed, q, err)
}
fixed := q.fixMalformed()
intSqFixed := fixed.integrateSquared()
// This seems like it should run into floating point errors, but it hasn't
// yet, so yay?
Expand All @@ -1423,3 +1417,107 @@ func TestQuantileOpsRandom(t *testing.T) {
})
}
}

// TestQuantileInvalid tests the quantile.invalid() method.
func TestQuantileInvalid(t *testing.T) {
testCases := []struct {
q quantile
invalid bool
}{
// Valid quantiles.
{
q: zeroQuantile,
},
{
q: quantile{{0, 100}, {1, 200}},
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, 400}},
},
{
q: quantile{{0, -811}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
},
{
q: quantile{{0, 102}, {0.125, 102.25}, {0.25, 202.5}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}},
},
// Invalid quantiles with NaN.
{
q: quantile{{0, 100}, {1, math.NaN()}},
invalid: true,
},
{
q: quantile{{0, math.NaN()}, {1, 200}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.NaN()}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.NaN()}, {1, math.NaN()}},
invalid: true,
},
{
q: quantile{{0, math.NaN()}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
invalid: true,
},
{
q: quantile{{0, math.NaN()}, {0.125, math.NaN()}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
invalid: true,
},
{
q: quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}},
invalid: true,
},
{
q: quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.NaN()}, {0.5, 103}, {0.625, math.NaN()}, {0.75, 3.5}, {0.875, 3.75}, {0.875, math.NaN()}, {0.875, 403.75}, {1, 104}},
invalid: true,
},
// Invalid quantiles with Inf.
{
q: quantile{{0, 100}, {1, math.Inf(1)}},
invalid: true,
},
{
q: quantile{{0, math.Inf(1)}, {1, 200}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(1)}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(1)}, {1, math.Inf(1)}},
invalid: true,
},
{
q: quantile{{0, 100}, {1, math.Inf(-1)}},
invalid: true,
},
{
q: quantile{{0, math.Inf(-1)}, {1, 200}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(-1)}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(-1)}, {1, math.Inf(-1)}},
invalid: true,
},
// Invalid quantiles with NaN and Inf.
{
q: quantile{{0, math.Inf(-1)}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.Inf(1)}, {0.5, 103}, {0.625, 103.25}, {0.75, math.NaN()}, {0.875, math.Inf(1)}, {0.875, math.Inf(-1)}, {0.875, 403.75}, {1, 104}},
invalid: true,
},
}
for i, tc := range testCases {
t.Run(strconv.Itoa(i), func(t *testing.T) {
invalid := tc.q.isInvalid()
if invalid != tc.invalid {
t.Errorf("test case %d expected invalid to be %v, but was %v", i, tc.invalid, invalid)
}
})
}
}
6 changes: 1 addition & 5 deletions pkg/sql/stats/simple_linear_regression_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,7 @@ func TestQuantileSimpleLinearRegression(t *testing.T) {
for i, tc := range testCases {
t.Run(strconv.Itoa(i), func(t *testing.T) {
yn, r2 := quantileSimpleLinearRegression(tc.x, tc.y, tc.xn)
var err error
yn, err = yn.fixMalformed()
if err != nil {
t.Errorf("test case %d had an unexpected error: %s", i, err)
}
yn = yn.fixMalformed()
if !reflect.DeepEqual(yn, tc.yn) {
t.Errorf("test case %d incorrect yn %v expected %v", i, yn, tc.yn)
}
Expand Down

0 comments on commit 1afac4f

Please sign in to comment.