Skip to content

Commit

Permalink
statistics: make exponential backoff estimation more safe (#23086)
Browse files Browse the repository at this point in the history
  • Loading branch information
winoros authored Mar 4, 2021
1 parent bee6d37 commit 263155a
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 10 deletions.
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sasha-s/go-deadlock v0.2.0/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44 h1:tB9NOR21++IjLyVx3/PCPhWMwqGNCMQEH96A6dMZ/gc=
github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/shirou/gopsutil v2.19.10+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shirou/gopsutil v3.20.12+incompatible h1:6VEGkOXP/eP4o2Ilk8cSsX0PhOEfX6leqAnD+urrp9M=
Expand Down
30 changes: 21 additions & 9 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"unsafe"

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/parser/charset"
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/mysql"
Expand Down Expand Up @@ -1233,15 +1234,20 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde
if isSingleCol && lowIsNull {
totalCount += float64(idx.NullCount)
}
expBackoffSuccess := false
// Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything.
// If the first column's range is point.
if rangePosition := GetOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer == Version2 && coll != nil {
expBackoffSel, err := idx.expBackoffEstimation(sc, coll, indexRange)
var expBackoffSel float64
expBackoffSel, expBackoffSuccess, err = idx.expBackoffEstimation(sc, coll, indexRange)
if err != nil {
return 0, err
}
totalCount += expBackoffSel * idx.TotalRowCount()
} else {
if expBackoffSuccess {
totalCount += expBackoffSel * idx.TotalRowCount()
}
}
if !expBackoffSuccess {
totalCount += idx.BetweenRowCount(l, r)
}
}
Expand All @@ -1252,7 +1258,7 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde
}

// expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details.
func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistColl, indexRange *ranger.Range) (float64, error) {
func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistColl, indexRange *ranger.Range) (float64, bool, error) {
tmpRan := []*ranger.Range{
{
LowVal: make([]types.Datum, 1),
Expand Down Expand Up @@ -1286,7 +1292,7 @@ func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistC
continue
}
if err != nil {
return 0, err
return 0, false, err
}
singleColumnEstResults = append(singleColumnEstResults, count)
}
Expand All @@ -1299,14 +1305,20 @@ func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistC
for i := 0; i < l && i < 4; i++ {
singleColumnEstResults[i] = singleColumnEstResults[i] / float64(coll.Count)
}
failpoint.Inject("cleanEstResults", func() {
singleColumnEstResults = singleColumnEstResults[:0]
l = 0
})
if l == 1 {
return singleColumnEstResults[0], nil
return singleColumnEstResults[0], true, nil
} else if l == 2 {
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]), nil
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]), true, nil
} else if l == 3 {
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])), nil
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])), true, nil
} else if l == 0 {
return 0, false, nil
}
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])) * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3]))), nil
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])) * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3]))), true, nil
}

type countByRangeFunc = func(*stmtctx.StatementContext, int64, []*ranger.Range) (float64, error)
Expand Down
12 changes: 11 additions & 1 deletion statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ package statistics_test

import (
. "github.com/pingcap/check"
"github.com/pingcap/failpoint"
"github.com/pingcap/parser/model"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/kv"
Expand Down Expand Up @@ -203,14 +204,23 @@ func (s *testIntegrationSuite) TestExpBackoffEstimation(c *C) {
output [][]string
)
s.testData.GetTestCases(c, &input, &output)
inputLen := len(input)
// The test cases are:
// Query a = 1, b = 1, c = 1, d >= 3 and d <= 5 separately. We got 5, 3, 2, 3.
// And then query and a = 1 and b = 1 and c = 1 and d >= 3 and d <= 5. It's result should follow the exp backoff,
// which is 2/5 * (3/5)^{1/2} * (3/5)*{1/4} * 1^{1/8} * 5 = 1.3634.
for i := 0; i < len(input); i++ {
for i := 0; i < inputLen-1; i++ {
s.testData.OnRecord(func() {
output[i] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[i]).Rows())
})
tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
}

// The last case is that no column is loaded and we get no stats at all.
c.Assert(failpoint.Enable("github.com/pingcap/tidb/statistics/cleanEstResults", `return(true)`), IsNil)
s.testData.OnRecord(func() {
output[inputLen-1] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[inputLen-1]).Rows())
})
tk.MustQuery(input[inputLen-1]).Check(testkit.Rows(output[inputLen-1]...))
c.Assert(failpoint.Disable("github.com/pingcap/tidb/statistics/cleanEstResults"), IsNil)
}
1 change: 1 addition & 0 deletions statistics/testdata/integration_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"explain select * from exp_backoff where b = 1",
"explain select * from exp_backoff where c = 1",
"explain select * from exp_backoff where d >= 3 and d <= 5",
"explain select * from exp_backoff where a = 1 and b = 1 and c = 1 and d >= 3 and d<= 5",
"explain select * from exp_backoff where a = 1 and b = 1 and c = 1 and d >= 3 and d<= 5"
]
}
Expand Down
4 changes: 4 additions & 0 deletions statistics/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
[
"IndexReader_6 1.36 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.36 cop[tikv] table:exp_backoff, index:idx(a, b, c, d) range:[1 1 1 3,1 1 1 5], keep order:false"
],
[
"IndexReader_6 0.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.00 cop[tikv] table:exp_backoff, index:idx(a, b, c, d) range:[1 1 1 3,1 1 1 5], keep order:false"
]
]
}
Expand Down

0 comments on commit 263155a

Please sign in to comment.