Skip to content

Commit

Permalink
Merge #82474 #82501
Browse files Browse the repository at this point in the history
82474: sql/stats: support rowCountEq = 0 in histogram.adjustCounts r=rytaft,mgartner,msirek a=michae2

The predicted histograms in statistics forecasts will often have buckets
with NumEq = 0, and some predicted histograms will have _all_ buckets
with NumEq = 0. This wasn't possible before forecasting, because the
histograms produced by `EquiDepthHistogram` never have any buckets with
NumEq = 0.

If `adjustCounts` is called on such a histogram, `rowCountEq` and
`distinctCountEq` will be zero. `adjustCounts` should still be able to
fix such a histogram to have sum(NumRange) = rowCountTotal and
sum(DistinctRange) = distinctCountTotal. This patch teaches
`adjustCounts` to handle these histograms.

(Similarly, predicted histograms could have all buckets with
NumRange = 0, but this is already possible for histograms produced by
`EquiDepthHistogram`, so `adjustCounts` already handles these.)

Also, add a few more comments to `adjustCounts`.

Assists: #79872

Release note: None

82501: sql/storageparam: break builtins dep on tabledesc r=ajwerner a=ajwerner

The TableStorageParamObserver meant that paramparse and transitively builtins
depended on tabledesc. This was unfortunate because we want seqexpr is depended
on by builtins and we want to use seqexpr in tabledesc, so we have to make
sure that builtins does not depend on tabledesc.

This commit achieves that goal by splitting out paramparse into three new
packages from paramparse:
 * sql/storageparam: defines the interface for the Setter, contains functions
   to drive forward the setting and resetting of params, and has some shared
   functionality.
 * sql/storageparam/indexstorageparam: implementation of storageparam.Setter
   for the `descpb.IndexDescriptor`.
 * sql/storageparam/tablestorageparam: implementation of storageparam.Setter
   for the `*tabledesc.Mutable`.

This allows the `builtins` package to use the `indexstorageparam` package
cleanly without depending on `*tabledesc.Mutable`. It also recognizes that
lots of utility methods in `paramparse` aren't about `storageparam`s.

Release note: None

Co-authored-by: Michael Erickson <[email protected]>
Co-authored-by: Andrew Werner <[email protected]>
  • Loading branch information
3 people committed Jun 7, 2022
3 parents 5bd3fdd + 3f2da1a + ab5b9bb commit 8344e69
Show file tree
Hide file tree
Showing 16 changed files with 530 additions and 357 deletions.
3 changes: 3 additions & 0 deletions pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ go_library(
"//pkg/sql/sqlutil",
"//pkg/sql/stats",
"//pkg/sql/stmtdiagnostics",
"//pkg/sql/storageparam",
"//pkg/sql/storageparam/indexstorageparam",
"//pkg/sql/storageparam/tablestorageparam",
"//pkg/sql/types",
"//pkg/sql/vtable",
"//pkg/storage/enginepb",
Expand Down
11 changes: 6 additions & 5 deletions pkg/sql/alter_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/resolver"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/schemaexpr"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
"github.com/cockroachdb/cockroach/pkg/sql/paramparse"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
Expand All @@ -43,6 +42,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam/tablestorageparam"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
Expand Down Expand Up @@ -727,12 +728,12 @@ func (n *alterTableNode) startExec(params runParams) error {
if ttl := n.tableDesc.GetRowLevelTTL(); ttl != nil {
ttlBefore = protoutil.Clone(ttl).(*catpb.RowLevelTTL)
}
if err := paramparse.SetStorageParameters(
if err := storageparam.Set(
params.ctx,
params.p.SemaCtx(),
params.EvalContext(),
t.StorageParams,
paramparse.NewTableStorageParamObserver(n.tableDesc),
tablestorageparam.NewSetter(n.tableDesc),
); err != nil {
return err
}
Expand Down Expand Up @@ -761,11 +762,11 @@ func (n *alterTableNode) startExec(params runParams) error {
if ttl := n.tableDesc.GetRowLevelTTL(); ttl != nil {
ttlBefore = protoutil.Clone(ttl).(*catpb.RowLevelTTL)
}
if err := paramparse.ResetStorageParameters(
if err := storageparam.Reset(
params.ctx,
params.EvalContext(),
t.Params,
paramparse.NewTableStorageParamObserver(n.tableDesc),
tablestorageparam.NewSetter(n.tableDesc),
); err != nil {
return err
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/sql/create_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/schemaexpr"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
"github.com/cockroachdb/cockroach/pkg/sql/paramparse"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgnotice"
Expand All @@ -34,6 +33,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam/indexstorageparam"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
Expand Down Expand Up @@ -274,12 +275,12 @@ func makeIndexDescriptor(
return nil, err
}

if err := paramparse.SetStorageParameters(
if err := storageparam.Set(
params.ctx,
params.p.SemaCtx(),
params.EvalContext(),
n.StorageParams,
&paramparse.IndexStorageParamObserver{IndexDesc: &indexDesc},
&indexstorageparam.Setter{IndexDesc: &indexDesc},
); err != nil {
return nil, err
}
Expand Down
11 changes: 7 additions & 4 deletions pkg/sql/create_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam/indexstorageparam"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam/tablestorageparam"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
Expand Down Expand Up @@ -1313,12 +1316,12 @@ func NewTableDesc(
id, dbID, sc.GetID(), n.Table.Table(), creationTime, privileges, persistence,
)

if err := paramparse.SetStorageParameters(
if err := storageparam.Set(
ctx,
semaCtx,
evalCtx,
n.StorageParams,
paramparse.NewTableStorageParamObserver(&desc),
tablestorageparam.NewSetter(&desc),
); err != nil {
return nil, err
}
Expand Down Expand Up @@ -1848,12 +1851,12 @@ func NewTableDesc(
}
idx.Predicate = expr
}
if err := paramparse.SetStorageParameters(
if err := storageparam.Set(
ctx,
semaCtx,
evalCtx,
d.StorageParams,
&paramparse.IndexStorageParamObserver{IndexDesc: &idx},
&indexstorageparam.Setter{IndexDesc: &idx},
); err != nil {
return nil, err
}
Expand Down
12 changes: 0 additions & 12 deletions pkg/sql/paramparse/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,17 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "paramparse",
srcs = [
"paramobserver.go",
"paramparse.go",
"validation.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/paramparse",
visibility = ["//visibility:public"],
deps = [
"//pkg/geo/geoindex",
"//pkg/server/telemetry",
"//pkg/settings",
"//pkg/sql/catalog/catpb",
"//pkg/sql/catalog/descpb",
"//pkg/sql/catalog/tabledesc",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/pgwire/pgnotice",
"//pkg/sql/sem/eval",
"//pkg/sql/sem/normalize",
"//pkg/sql/sem/tree",
"//pkg/sql/sqltelemetry",
"//pkg/sql/types",
"//pkg/util/duration",
"//pkg/util/errorutil/unimplemented",
"@com_github_cockroachdb_errors//:errors",
],
)
3 changes: 1 addition & 2 deletions pkg/sql/paramparse/paramparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

// Package paramparse parses parameters that are set in param lists
// or session vars.
// Package paramparse provides utilities for parsing storage paramaters.
package paramparse

import (
Expand Down
7 changes: 6 additions & 1 deletion pkg/sql/sem/builtins/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ go_library(
"//pkg/sql/lex",
"//pkg/sql/lexbase",
"//pkg/sql/memsize",
"//pkg/sql/paramparse",
"//pkg/sql/parser",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
Expand All @@ -78,6 +77,8 @@ go_library(
"//pkg/sql/sqlliveness",
"//pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil",
"//pkg/sql/sqltelemetry",
"//pkg/sql/storageparam",
"//pkg/sql/storageparam/indexstorageparam",
"//pkg/sql/types",
"//pkg/streaming",
"//pkg/util",
Expand Down Expand Up @@ -174,5 +175,9 @@ disallowed_imports_test(
src = "builtins",
disallowed_list = [
"//pkg/sql/catalog/descs",
"//pkg/sql/catalog/tabledesc",
"//pkg/sql/catalog/schemadesc",
"//pkg/sql/catalog/dbdesc",
"//pkg/sql/catalog/typedesc",
],
)
7 changes: 4 additions & 3 deletions pkg/sql/sem/builtins/geo_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ import (
"github.com/cockroachdb/cockroach/pkg/geo/twkb"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/paramparse"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sem/volatility"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam"
"github.com/cockroachdb/cockroach/pkg/sql/storageparam/indexstorageparam"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
Expand Down Expand Up @@ -7671,12 +7672,12 @@ func applyGeoindexConfigStorageParams(
return geoindex.Config{}, errors.Newf("invalid storage parameters specified: %s", params)
}
semaCtx := tree.MakeSemaContext()
if err := paramparse.SetStorageParameters(
if err := storageparam.Set(
evalCtx.Context,
&semaCtx,
evalCtx,
stmt.AST.(*tree.CreateIndex).StorageParams,
&paramparse.IndexStorageParamObserver{IndexDesc: indexDesc},
&indexstorageparam.Setter{IndexDesc: indexDesc},
); err != nil {
return geoindex.Config{}, err
}
Expand Down
30 changes: 22 additions & 8 deletions pkg/sql/stats/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,16 @@ type histogram struct {
}

// adjustCounts adjusts the row count and number of distinct values per bucket
// based on the total row count and estimated distinct count.
// to equal the total row count and estimated distinct count. The total row
// count and estimated distinct count should not include NULL values, and the
// histogram should not contain any buckets for NULL values.
func (h *histogram) adjustCounts(evalCtx *eval.Context, rowCountTotal, distinctCountTotal float64) {
// Empty table cases.
if rowCountTotal <= 0 || distinctCountTotal <= 0 {
h.buckets = make([]cat.HistogramBucket, 0)
return
}

// Calculate the current state of the histogram so we can adjust it as needed.
// The number of rows and distinct values represented by the histogram should
// be adjusted so they equal rowCountTotal and distinctCountTotal.
Expand All @@ -189,13 +197,16 @@ func (h *histogram) adjustCounts(evalCtx *eval.Context, rowCountTotal, distinctC
}
}

if rowCountEq <= 0 {
panic(errors.AssertionFailedf("expected a positive value for rowCountEq"))
// If the histogram only had empty buckets, we can't adjust it.
if rowCountRange+rowCountEq <= 0 || distinctCountRange+distinctCountEq <= 0 {
h.buckets = make([]cat.HistogramBucket, 0)
return
}

// If the upper bounds account for all distinct values (as estimated by the
// sketch), make the histogram consistent by clearing the ranges and adjusting
// the NumEq values to add up to the row count.
// the NumEq values to add up to the row count. This might be the case for
// low-cardinality types like BOOL and ENUM or other low-cardinality data.
if distinctCountEq >= distinctCountTotal {
adjustmentFactorNumEq := rowCountTotal / rowCountEq
for i := range h.buckets {
Expand All @@ -209,7 +220,7 @@ func (h *histogram) adjustCounts(evalCtx *eval.Context, rowCountTotal, distinctC
// The upper bounds do not account for all distinct values, so adjust the
// NumEq values if needed so they add up to less than the row count.
remDistinctCount := distinctCountTotal - distinctCountEq
if rowCountEq+remDistinctCount >= rowCountTotal {
if rowCountEq > 0 && rowCountEq+remDistinctCount > rowCountTotal {
targetRowCountEq := rowCountTotal - remDistinctCount
adjustmentFactorNumEq := targetRowCountEq / rowCountEq
for i := range h.buckets {
Expand All @@ -229,10 +240,10 @@ func (h *histogram) adjustCounts(evalCtx *eval.Context, rowCountTotal, distinctC
lowerBound := h.buckets[0].UpperBound
upperBound := h.buckets[len(h.buckets)-1].UpperBound
if maxDistinct, ok := tree.MaxDistinctCount(evalCtx, lowerBound, upperBound); ok {
// Subtract distinctCountEq to account for the upper bounds of the
// Subtract number of buckets to account for the upper bounds of the
// buckets, along with the current range distinct count which has already
// been accounted for.
maxDistinctCountRange = float64(maxDistinct) - distinctCountEq - distinctCountRange
maxDistinctCountRange = float64(maxDistinct) - float64(len(h.buckets)) - distinctCountRange
}

// Add distinct values into the histogram if there is space. Increment the
Expand Down Expand Up @@ -277,7 +288,10 @@ func (h *histogram) adjustCounts(evalCtx *eval.Context, rowCountTotal, distinctC
)
}

// Adjust the values so the row counts and distinct counts add up correctly.
// At this point rowCountRange + rowCountEq >= distinctCountTotal but not
// necessarily rowCountTotal, so we've accounted for all distinct values, and
// any additional rows we add will be duplicate values. We can spread the
// final adjustment proportionately across both NumRange and NumEq.
adjustmentFactorDistinctRange := float64(1)
if distinctCountRange > 0 {
adjustmentFactorDistinctRange = (distinctCountTotal - distinctCountEq) / distinctCountRange
Expand Down
44 changes: 44 additions & 0 deletions pkg/sql/stats/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,50 @@ func TestAdjustCounts(t *testing.T) {
{NumRange: 1551.19, NumEq: 3447.09, DistinctRange: 450, UpperBound: f(1000)},
},
},
{ // Zero rowCount and distinctCount.
h: []cat.HistogramBucket{
{NumRange: 0, NumEq: 1, DistinctRange: 0, UpperBound: f(1)},
},
rowCount: 0,
distinctCount: 0,
expected: []cat.HistogramBucket{},
},
{ // Negative rowCount and distinctCount.
h: []cat.HistogramBucket{
{NumRange: 0, NumEq: 1, DistinctRange: 0, UpperBound: f(1)},
},
rowCount: -100,
distinctCount: -90,
expected: []cat.HistogramBucket{},
},
{ // Empty initial histogram.
h: []cat.HistogramBucket{},
rowCount: 1000,
distinctCount: 1000,
expected: []cat.HistogramBucket{},
},
{ // Empty bucket in initial histogram.
h: []cat.HistogramBucket{
{NumRange: 0, NumEq: 0, DistinctRange: 0, UpperBound: f(1)},
},
rowCount: 99,
distinctCount: 99,
expected: []cat.HistogramBucket{},
},
{ // All zero NumEq.
h: []cat.HistogramBucket{
{NumRange: 0, NumEq: 0, DistinctRange: 0, UpperBound: f(1)},
{NumRange: 10, NumEq: 0, DistinctRange: 5, UpperBound: f(100)},
{NumRange: 10, NumEq: 0, DistinctRange: 10, UpperBound: f(200)},
},
rowCount: 100,
distinctCount: 60,
expected: []cat.HistogramBucket{
{NumRange: 0, NumEq: 0, DistinctRange: 0, UpperBound: f(1)},
{NumRange: 50, NumEq: 0, DistinctRange: 27.5, UpperBound: f(100)},
{NumRange: 50, NumEq: 0, DistinctRange: 32.5, UpperBound: f(200)},
},
},
}

evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
Expand Down
20 changes: 20 additions & 0 deletions pkg/sql/storageparam/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "storageparam",
srcs = ["storage_param.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/storageparam",
visibility = ["//visibility:public"],
deps = [
"//pkg/server/telemetry",
"//pkg/sql/paramparse",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/pgwire/pgnotice",
"//pkg/sql/sem/eval",
"//pkg/sql/sem/normalize",
"//pkg/sql/sem/tree",
"//pkg/sql/sqltelemetry",
"//pkg/sql/types",
],
)
20 changes: 20 additions & 0 deletions pkg/sql/storageparam/indexstorageparam/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "indexstorageparam",
srcs = ["index_storage_param.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/storageparam/indexstorageparam",
visibility = ["//visibility:public"],
deps = [
"//pkg/geo/geoindex",
"//pkg/sql/catalog/descpb",
"//pkg/sql/paramparse",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/sem/eval",
"//pkg/sql/sem/tree",
"//pkg/sql/storageparam",
"//pkg/util/errorutil/unimplemented",
"@com_github_cockroachdb_errors//:errors",
],
)
Loading

0 comments on commit 8344e69

Please sign in to comment.