Skip to content

Commit

Permalink
opt: fix bug with histograms and multi-span index constraints
Browse files Browse the repository at this point in the history
Prior to this commit, filtering a histogram with a multi-span index
constraint could lead to incorrect results if the histogram column
was part of the exact prefix of the constraint. This was due to the
fact that the same value appeared in multiple spans, breaking the
assumption of the histogram code that values in spans were always
ordered and non-overlapping. For example, filtering a histogram on
column b with the constraint /b/c/: [/1/2 - /1/2] [/1/4 - /1/4] would
return an incorrect result, because the values in the matching
histogram bucket would be counted twice.

This commit fixes the problem by only considering a single span if
the column is part of the exact prefix.

Release note (performance improvement): Fixed a bug in the
histogram logic in the optimizer which was causing an over-estimate
for the cardinality of constrained index scans in some cases when
multiple columns of the index were constrained. This problem was
introduced early in the development for the 20.2 release so should
not have ever been part of a release. The fix improves the optimizer's
cardinality estimates so may result in better query plan selection.
  • Loading branch information
rytaft committed May 15, 2020
1 parent 61f6b0f commit 1b17389
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2659,7 +2659,7 @@ func (sb *statisticsBuilder) applyIndexConstraint(
inputStat, _ := sb.colStatFromInput(colSet, e)
inputHist := inputStat.Histogram
if inputHist != nil {
if _, ok := inputHist.CanFilter(c); ok {
if _, _, ok := inputHist.CanFilter(c); ok {
s := &relProps.Stats
if colStat, ok := s.ColStats.Lookup(colSet); ok {
colStat.Histogram = inputHist.Filter(c)
Expand Down Expand Up @@ -2722,7 +2722,7 @@ func (sb *statisticsBuilder) applyConstraintSet(
inputStat, _ := sb.colStatFromInput(cols, e)
inputHist := inputStat.Histogram
if inputHist != nil {
if _, ok := inputHist.CanFilter(c); ok {
if _, _, ok := inputHist.CanFilter(c); ok {
if colStat, ok := s.ColStats.Lookup(cols); ok {
colStat.Histogram = inputHist.Filter(c)
histCols.UnionWith(cols)
Expand Down
19 changes: 13 additions & 6 deletions pkg/sql/opt/props/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,23 +165,24 @@ func maxDistinctValuesInRange(lowerBound, upperBound tree.Datum) (_ float64, ok
// CanFilter returns true if the given constraint can filter the histogram.
// This is the case if the histogram column matches one of the columns in
// the exact prefix of c or the next column immediately after the exact prefix.
// Returns the offset of the matching column in the constraint if found.
func (h *Histogram) CanFilter(c *constraint.Constraint) (colOffset int, ok bool) {
exactPrefix := c.ExactPrefix(h.evalCtx)
// Returns the offset of the matching column in the constraint if found, as
// well as the exact prefix.
func (h *Histogram) CanFilter(c *constraint.Constraint) (colOffset, exactPrefix int, ok bool) {
exactPrefix = c.ExactPrefix(h.evalCtx)
constrainedCols := c.ConstrainedColumns(h.evalCtx)
for i := 0; i < constrainedCols && i <= exactPrefix; i++ {
if c.Columns.Get(i).ID() == h.col {
return i, true
return i, exactPrefix, true
}
}
return 0, false
return 0, exactPrefix, false
}

// Filter filters the histogram according to the given constraint, and returns
// a new histogram with the results. CanFilter should be called first to
// validate that c can filter the histogram.
func (h *Histogram) Filter(c *constraint.Constraint) *Histogram {
colOffset, ok := h.CanFilter(c)
colOffset, exactPrefix, ok := h.CanFilter(c)
if !ok {
panic(errors.AssertionFailedf("column mismatch"))
}
Expand Down Expand Up @@ -253,6 +254,12 @@ func (h *Histogram) Filter(c *constraint.Constraint) *Histogram {

// For the remaining buckets and spans, use a variation on merge sort.
for spanIndex < spanCount {
if spanIndex > 0 && colOffset < exactPrefix {
// If this column is part of the exact prefix, we don't need to look at
// the rest of the spans.
break
}

// Convert the bucket to a span in order to take advantage of the
// constraint library.
left := makeSpanFromBucket(&iter, prefix)
Expand Down
15 changes: 13 additions & 2 deletions pkg/sql/opt/props/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func TestCanFilter(t *testing.T) {
h.Init(&evalCtx, opt.ColumnID(1), []cat.HistogramBucket{})
for _, tc := range testData {
c := constraint.ParseConstraint(&evalCtx, tc.constraint)
colIdx, ok := h.CanFilter(&c)
colIdx, _, ok := h.CanFilter(&c)
if ok != tc.canFilter {
t.Fatalf(
"for constraint %s, expected canFilter=%v but found %v", tc.constraint, tc.canFilter, ok,
Expand Down Expand Up @@ -288,6 +288,17 @@ func TestHistogram(t *testing.T) {
maxDistinct: 1,
distinct: 1,
},
{
constraint: "/2/1/3: [/1/40/2 - /1/40/2] [/1/40/4 - /1/40/4] [/1/40/6 - /1/40/6]",
// 0 5.7143
// <---- 40 -
buckets: []cat.HistogramBucket{
{NumRange: 0, NumEq: 5.71, DistinctRange: 0, UpperBound: tree.NewDInt(40)},
},
count: 5.71,
maxDistinct: 1,
distinct: 1,
},
}

for i := range testData {
Expand All @@ -296,7 +307,7 @@ func TestHistogram(t *testing.T) {

// Make sure all test cases work with both ascending and descending columns.
for _, c := range ascAndDesc {
if _, ok := h.CanFilter(&c); !ok {
if _, _, ok := h.CanFilter(&c); !ok {
t.Fatalf("constraint %s cannot filter histogram %v", c.String(), *h)
}
filtered := h.Filter(&c)
Expand Down

0 comments on commit 1b17389

Please sign in to comment.