Skip to content

Commit

Permalink
Merge pull request #113881 from DrewKimball/backport23.1.12-rc.FROZEN…
Browse files Browse the repository at this point in the history
…-113712

release-23.1.12-rc.FROZEN: sql/stats: don't use linear regression with NaN for stats forecasting
  • Loading branch information
DrewKimball authored Nov 7, 2023
2 parents ea8b908 + 5d0e071 commit d7e9824
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 27 deletions.
1 change: 0 additions & 1 deletion pkg/sql/stats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ go_library(
"//pkg/util/hlc",
"//pkg/util/intsets",
"//pkg/util/log",
"//pkg/util/log/logcrash",
"//pkg/util/mon",
"//pkg/util/protoutil",
"//pkg/util/stop",
Expand Down
12 changes: 4 additions & 8 deletions pkg/sql/stats/forecast.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
)
Expand Down Expand Up @@ -273,7 +272,7 @@ func forecastColumnStatistics(
// histogram. NOTE: If any of the observed histograms were for inverted
// indexes this will produce an incorrect histogram.
if observed[0].HistogramData != nil && observed[0].HistogramData.ColumnType != nil {
hist, err := predictHistogram(ctx, sv, observed, forecastAt, minRequiredFit, nonNullRowCount)
hist, err := predictHistogram(ctx, observed, forecastAt, minRequiredFit, nonNullRowCount)
if err != nil {
// If we did not successfully predict a histogram then copy the latest
// histogram so we can adjust it.
Expand Down Expand Up @@ -359,7 +358,6 @@ func forecastColumnStatistics(
// predictHistogram tries to predict the histogram at forecast time.
func predictHistogram(
ctx context.Context,
sv *settings.Values,
observed []*TableStatistic,
forecastAt float64,
minRequiredFit float64,
Expand Down Expand Up @@ -412,12 +410,10 @@ func predictHistogram(
// Construct a linear regression model of quantile functions over time, and
// use it to predict a quantile function at the given time.
yₙ, r2 := quantileSimpleLinearRegression(createdAts, quantiles, forecastAt)
var err error
yₙ, err = yₙ.fixMalformed()
if err != nil {
logcrash.ReportOrPanic(ctx, sv, "unable to fix malformed stats quantile: %v", yₙ)
return histogram{}, err
if yₙ.isInvalid() {
return histogram{}, errors.Newf("predicted histogram contains overflow values")
}
yₙ = yₙ.fixMalformed()
log.VEventf(
ctx, 3, "forecast for table %v columns %v predicted quantile %v R² %v",
tableID, columnIDs, yₙ, r2,
Expand Down
20 changes: 16 additions & 4 deletions pkg/sql/stats/quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,10 +656,12 @@ func (q quantile) integrateSquared() float64 {
// This function fixes the negative slope pieces by "moving" p from the
// overlapping places to where it should be. No p is lost in the making of these
// calculations.
func (q quantile) fixMalformed() (quantile, error) {
//
// fixMalformed should not be called without checking that isInvalid() is false.
func (q quantile) fixMalformed() quantile {
// Check for the happy case where q is already well-formed.
if q.isWellFormed() {
return q, nil
return q
}

// To fix a malformed quantile function, we recalculate p for each distinct
Expand Down Expand Up @@ -804,7 +806,7 @@ func (q quantile) fixMalformed() (quantile, error) {
// (> v) and we always add two intersectionPs for "double roots" touching
// our line from above (one endpoint > v, one endpoint = v).
if len(intersectionPs) == 0 {
return q, errors.New("unable to fix malformed stats quantile")
return q
}
lessEqP := intersectionPs[0]
for j := 1; j < len(intersectionPs); j += 2 {
Expand All @@ -821,7 +823,17 @@ func (q quantile) fixMalformed() (quantile, error) {
fixed = append(fixed, quantilePoint{p: lessEqP, v: val})
}
}
return fixed, nil
return fixed
}

// isInvalid returns true if q contains NaN or Inf values.
func (q quantile) isInvalid() bool {
for i := range q {
if math.IsNaN(q[i].v) || math.IsInf(q[i].v, 0) {
return true
}
}
return false
}

// isWellFormed returns true if q is well-formed (i.e. is non-decreasing in v).
Expand Down
114 changes: 106 additions & 8 deletions pkg/sql/stats/quantile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1348,10 +1348,7 @@ func TestQuantileOps(t *testing.T) {
if intSqMult0 != 0 {
t.Errorf("test case %d incorrect intSqMult0 %v expected 0", i, intSqMult0)
}
fixed, err := tc.q.fixMalformed()
if err != nil {
t.Errorf("test case %d has an unexpected error: %s", i, err)
}
fixed := tc.q.fixMalformed()
if !reflect.DeepEqual(fixed, tc.fixed) {
t.Errorf("test case %d incorrect fixed %v expected %v", i, fixed, tc.fixed)
}
Expand Down Expand Up @@ -1410,10 +1407,7 @@ func TestQuantileOpsRandom(t *testing.T) {
if intSqMult0 != 0 {
t.Errorf("seed %v quantile %v incorrect intSqMult0 %v expected 0", seed, q, intSqMult0)
}
fixed, err := q.fixMalformed()
if err != nil {
t.Errorf("seed %v quantile %v had an unexpected error: %s", seed, q, err)
}
fixed := q.fixMalformed()
intSqFixed := fixed.integrateSquared()
// This seems like it should run into floating point errors, but it hasn't
// yet, so yay?
Expand All @@ -1423,3 +1417,107 @@ func TestQuantileOpsRandom(t *testing.T) {
})
}
}

// TestQuantileInvalid tests the quantile.invalid() method.
func TestQuantileInvalid(t *testing.T) {
testCases := []struct {
q quantile
invalid bool
}{
// Valid quantiles.
{
q: zeroQuantile,
},
{
q: quantile{{0, 100}, {1, 200}},
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, 400}},
},
{
q: quantile{{0, -811}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
},
{
q: quantile{{0, 102}, {0.125, 102.25}, {0.25, 202.5}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}},
},
// Invalid quantiles with NaN.
{
q: quantile{{0, 100}, {1, math.NaN()}},
invalid: true,
},
{
q: quantile{{0, math.NaN()}, {1, 200}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.NaN()}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.NaN()}, {1, math.NaN()}},
invalid: true,
},
{
q: quantile{{0, math.NaN()}, {0.125, -811}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
invalid: true,
},
{
q: quantile{{0, math.NaN()}, {0.125, math.NaN()}, {0.125, -813}, {0.25, -813}, {0.25, -815}, {1, -821}, {1, -721}},
invalid: true,
},
{
q: quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, 202.75}, {0.5, 103}, {0.625, 103.25}, {0.75, 3.5}, {0.875, 3.75}, {0.875, 103.75}, {0.875, 403.75}, {1, 104}},
invalid: true,
},
{
q: quantile{{0, 102}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.NaN()}, {0.5, 103}, {0.625, math.NaN()}, {0.75, 3.5}, {0.875, 3.75}, {0.875, math.NaN()}, {0.875, 403.75}, {1, 104}},
invalid: true,
},
// Invalid quantiles with Inf.
{
q: quantile{{0, 100}, {1, math.Inf(1)}},
invalid: true,
},
{
q: quantile{{0, math.Inf(1)}, {1, 200}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(1)}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(1)}, {1, math.Inf(1)}},
invalid: true,
},
{
q: quantile{{0, 100}, {1, math.Inf(-1)}},
invalid: true,
},
{
q: quantile{{0, math.Inf(-1)}, {1, 200}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, 400}, {1, math.Inf(-1)}},
invalid: true,
},
{
q: quantile{{0, 100}, {0, 200}, {0, 200}, {0.5, 300}, {0.5, 300}, {0.5, math.Inf(-1)}, {1, math.Inf(-1)}},
invalid: true,
},
// Invalid quantiles with NaN and Inf.
{
q: quantile{{0, math.Inf(-1)}, {0.125, 102.25}, {0.25, math.NaN()}, {0.375, 302.75}, {0.375, math.Inf(1)}, {0.5, 103}, {0.625, 103.25}, {0.75, math.NaN()}, {0.875, math.Inf(1)}, {0.875, math.Inf(-1)}, {0.875, 403.75}, {1, 104}},
invalid: true,
},
}
for i, tc := range testCases {
t.Run(strconv.Itoa(i), func(t *testing.T) {
invalid := tc.q.isInvalid()
if invalid != tc.invalid {
t.Errorf("test case %d expected invalid to be %v, but was %v", i, tc.invalid, invalid)
}
})
}
}
6 changes: 1 addition & 5 deletions pkg/sql/stats/simple_linear_regression_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,7 @@ func TestQuantileSimpleLinearRegression(t *testing.T) {
for i, tc := range testCases {
t.Run(strconv.Itoa(i), func(t *testing.T) {
yn, r2 := quantileSimpleLinearRegression(tc.x, tc.y, tc.xn)
var err error
yn, err = yn.fixMalformed()
if err != nil {
t.Errorf("test case %d had an unexpected error: %s", i, err)
}
yn = yn.fixMalformed()
if !reflect.DeepEqual(yn, tc.yn) {
t.Errorf("test case %d incorrect yn %v expected %v", i, yn, tc.yn)
}
Expand Down
16 changes: 15 additions & 1 deletion pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/cache"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
Expand Down Expand Up @@ -214,10 +215,23 @@ func decodeTableStatisticsKV(
// The statistics are ordered by their CreatedAt time (newest-to-oldest).
func (sc *TableStatisticsCache) GetTableStats(
ctx context.Context, table catalog.TableDescriptor,
) ([]*TableStatistic, error) {
) (stats []*TableStatistic, err error) {
if !statsUsageAllowed(table, sc.settings) {
return nil, nil
}
defer func() {
if r := recover(); r != nil {
// In the event of a "safe" panic, we only want to log the error and
// continue executing the query without stats for this table.
if ok, e := errorutil.ShouldCatch(r); ok {
err = e
} else {
// Other panic objects can't be considered "safe" and thus are
// propagated as crashes that terminate the session.
panic(r)
}
}
}()
forecast := forecastAllowed(table, sc.settings)
return sc.getTableStatsFromCache(ctx, table.GetID(), &forecast)
}
Expand Down

0 comments on commit d7e9824

Please sign in to comment.