Skip to content

Commit

Permalink
Merge branch 'master' into snapshot-fail-test
Browse files Browse the repository at this point in the history
  • Loading branch information
sticnarf authored Feb 24, 2021
2 parents ba9706e + 6d6c833 commit 16581f0
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 16 deletions.
2 changes: 1 addition & 1 deletion statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error {
expected := 0.0
if isIndex {
idx := t.Indices[id]
expected, err = idx.GetRowCount(sc, ranges, t.ModifyCount)
expected, err = idx.GetRowCount(sc, nil, ranges, t.ModifyCount)
expected *= idx.GetIncreaseFactor(t.Count)
} else {
c := t.Columns[id]
Expand Down
72 changes: 70 additions & 2 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -1189,7 +1189,7 @@ func (idx *Index) QueryBytes(d []byte) uint64 {

// GetRowCount returns the row count of the given ranges.
// It uses the modifyCount to adjust the influence of modifications on the table.
func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*ranger.Range, modifyCount int64) (float64, error) {
func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, indexRanges []*ranger.Range, modifyCount int64) (float64, error) {
totalCount := float64(0)
isSingleCol := len(idx.Info.Columns) == 1
for _, indexRange := range indexRanges {
Expand Down Expand Up @@ -1226,21 +1226,89 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*range
}
l := types.NewBytesDatum(lb)
r := types.NewBytesDatum(rb)
totalCount += idx.BetweenRowCount(l, r)
lowIsNull := bytes.Equal(lb, nullKeyBytes)
if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) {
totalCount += outOfRangeEQSelectivity(outOfRangeBetweenRate, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount()
}
if isSingleCol && lowIsNull {
totalCount += float64(idx.NullCount)
}
// Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything.
// If the first column's range is point.
if rangePosition := GetOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer == Version2 && coll != nil {
expBackoffSel, err := idx.expBackoffEstimation(sc, coll, indexRange)
if err != nil {
return 0, err
}
totalCount += expBackoffSel * idx.TotalRowCount()
} else {
totalCount += idx.BetweenRowCount(l, r)
}
}
if totalCount > idx.TotalRowCount() {
totalCount = idx.TotalRowCount()
}
return totalCount, nil
}

// expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details.
func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistColl, indexRange *ranger.Range) (float64, error) {
tmpRan := []*ranger.Range{
{
LowVal: make([]types.Datum, 1),
HighVal: make([]types.Datum, 1),
},
}
colsIDs := coll.Idx2ColumnIDs[idx.ID]
singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
// 1. Calc the selectivity of each column.
// 2. Sort them and choose the first 4 most selective filter and the corresponding selectivity is sel_1, sel_2, sel_3, sel_4 where i < j => sel_i < sel_j.
// 3. The final selectivity would be sel_1 * sel_2^{1/2} * sel_3^{1/4} * sel_4^{1/8}.
// This calculation reduced the independence assumption and can work well better than it.
for i := 0; i < len(indexRange.LowVal); i++ {
tmpRan[0].LowVal[0] = indexRange.LowVal[i]
tmpRan[0].HighVal[0] = indexRange.HighVal[i]
if i == len(indexRange.LowVal)-1 {
tmpRan[0].LowExclude = indexRange.LowExclude
tmpRan[0].HighExclude = indexRange.HighExclude
}
colID := colsIDs[i]
var (
count float64
err error
)
if anotherIdxID, ok := coll.ColID2IdxID[colID]; ok && anotherIdxID != idx.ID {
count, err = coll.GetRowCountByIndexRanges(sc, anotherIdxID, tmpRan)
} else if col, ok := coll.Columns[colID]; ok && !col.IsInvalid(sc, coll.Pseudo) {
count, err = coll.GetRowCountByColumnRanges(sc, colID, tmpRan)
} else {
continue
}
if err != nil {
return 0, err
}
singleColumnEstResults = append(singleColumnEstResults, count)
}
// Sort them.
sort.Slice(singleColumnEstResults, func(i, j int) bool {
return singleColumnEstResults[i] < singleColumnEstResults[j]
})
l := len(singleColumnEstResults)
// Convert the first 4 to selectivity results.
for i := 0; i < l && i < 4; i++ {
singleColumnEstResults[i] = singleColumnEstResults[i] / float64(coll.Count)
}
if l == 1 {
return singleColumnEstResults[0], nil
} else if l == 2 {
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]), nil
} else if l == 3 {
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])), nil
}
return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])) * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3]))), nil
}

type countByRangeFunc = func(*stmtctx.StatementContext, int64, []*ranger.Range) (float64, error)

// newHistogramBySelectivity fulfills the content of new histogram by the given selectivity result.
Expand Down
35 changes: 32 additions & 3 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,31 @@ import (
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/util/testkit"
"github.com/pingcap/tidb/util/testleak"
"github.com/pingcap/tidb/util/testutil"
)

var _ = Suite(&testIntegrationSuite{})

type testIntegrationSuite struct {
store kv.Storage
do *domain.Domain
store kv.Storage
do *domain.Domain
testData testutil.TestData
}

func (s *testIntegrationSuite) SetUpSuite(c *C) {
testleak.BeforeTest()
// Add the hook here to avoid data race.
var err error
s.store, s.do, err = newStoreWithBootstrap()
c.Assert(err, IsNil)
s.testData, err = testutil.LoadTestSuiteData("testdata", "integration_suite")
c.Assert(err, IsNil)
}

func (s *testIntegrationSuite) TearDownSuite(c *C) {
s.do.Close()
c.Assert(s.store.Close(), IsNil)
testleak.AfterTest(c)()
c.Assert(s.testData.GenerateOutputIfNeeded(), IsNil)
}

func (s *testIntegrationSuite) TestChangeVerTo2Behavior(c *C) {
Expand Down Expand Up @@ -185,3 +189,28 @@ func (s *testIntegrationSuite) TestIncAnalyzeOnVer2(c *C) {
"test t idx 1 4 4",
))
}

func (s *testIntegrationSuite) TestExpBackoffEstimation(c *C) {
defer cleanEnv(c, s.store, s.do)
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("create table exp_backoff(a int, b int, c int, d int, index idx(a, b, c, d))")
tk.MustExec("insert into exp_backoff values(1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 2, 3), (1, 2, 2, 4), (1, 2, 3, 5)")
tk.MustExec("set @@session.tidb_analyze_version=2")
tk.MustExec("analyze table exp_backoff")
var (
input []string
output [][]string
)
s.testData.GetTestCases(c, &input, &output)
// The test cases are:
// Query a = 1, b = 1, c = 1, d >= 3 and d <= 5 separately. We got 5, 3, 2, 3.
// And then query and a = 1 and b = 1 and c = 1 and d >= 3 and d <= 5. It's result should follow the exp backoff,
// which is 2/5 * (3/5)^{1/2} * (3/5)*{1/4} * 1^{1/8} * 5 = 1.3634.
for i := 0; i < len(input); i++ {
s.testData.OnRecord(func() {
output[i] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[i]).Rows())
})
tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
}
}
4 changes: 2 additions & 2 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idx
if idx.CMSketch != nil && idx.StatsVer == Version1 {
result, err = coll.getIndexRowCount(sc, idxID, indexRanges)
} else {
result, err = idx.GetRowCount(sc, indexRanges, coll.ModifyCount)
result, err = idx.GetRowCount(sc, coll, indexRanges, coll.ModifyCount)
}
result *= idx.GetIncreaseFactor(coll.Count)
return result, errors.Trace(err)
Expand Down Expand Up @@ -575,7 +575,7 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64
// on single-column index, use previous way as well, because CMSketch does not contain null
// values in this case.
if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) {
count, err := idx.GetRowCount(sc, []*ranger.Range{ran}, coll.ModifyCount)
count, err := idx.GetRowCount(sc, nil, []*ranger.Range{ran}, coll.ModifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand Down
12 changes: 12 additions & 0 deletions statistics/testdata/integration_suite_in.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"name": "TestExpBackoffEstimation",
"cases": [
"explain select * from exp_backoff where a = 1",
"explain select * from exp_backoff where b = 1",
"explain select * from exp_backoff where c = 1",
"explain select * from exp_backoff where d >= 3 and d <= 5",
"explain select * from exp_backoff where a = 1 and b = 1 and c = 1 and d >= 3 and d<= 5"
]
}
]
30 changes: 30 additions & 0 deletions statistics/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[
{
"Name": "TestExpBackoffEstimation",
"Cases": [
[
"IndexReader_6 5.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 5.00 cop[tikv] table:exp_backoff, index:idx(a, b, c, d) range:[1,1], keep order:false"
],
[
"TableReader_7 3.00 root data:Selection_6",
"└─Selection_6 3.00 cop[tikv] eq(test.exp_backoff.b, 1)",
" └─TableFullScan_5 5.00 cop[tikv] table:exp_backoff keep order:false"
],
[
"TableReader_7 2.00 root data:Selection_6",
"└─Selection_6 2.00 cop[tikv] eq(test.exp_backoff.c, 1)",
" └─TableFullScan_5 5.00 cop[tikv] table:exp_backoff keep order:false"
],
[
"TableReader_7 3.00 root data:Selection_6",
"└─Selection_6 3.00 cop[tikv] ge(test.exp_backoff.d, 3), le(test.exp_backoff.d, 5)",
" └─TableFullScan_5 5.00 cop[tikv] table:exp_backoff keep order:false"
],
[
"IndexReader_6 1.36 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.36 cop[tikv] table:exp_backoff, index:idx(a, b, c, d) range:[1 1 1 3,1 1 1 5], keep order:false"
]
]
}
]
12 changes: 6 additions & 6 deletions statistics/testdata/stats_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,8 @@
" └─TableFullScan_5 4.00 cop[tikv] table:tdatetime keep order:false"
],
[
"TableReader_7 1.33 root data:Selection_6",
"└─Selection_6 1.33 cop[tikv] eq(test.tint.b, 1)",
"TableReader_7 1.00 root data:Selection_6",
"└─Selection_6 1.00 cop[tikv] eq(test.tint.b, 1)",
" └─TableFullScan_5 8.00 cop[tikv] table:tint keep order:false"
],
[
Expand All @@ -328,8 +328,8 @@
" └─TableFullScan_5 8.00 cop[tikv] table:tint keep order:false"
],
[
"TableReader_7 1.01 root data:Selection_6",
"└─Selection_6 1.01 cop[tikv] eq(test.tint.b, 8)",
"TableReader_7 1.00 root data:Selection_6",
"└─Selection_6 1.00 cop[tikv] eq(test.tint.b, 8)",
" └─TableFullScan_5 8.00 cop[tikv] table:tint keep order:false"
],
[
Expand Down Expand Up @@ -405,8 +405,8 @@
"└─TableRangeScan_5 3.00 cop[tikv] table:ct1 range:[\"6\",\"8\"], keep order:false"
],
[
"TableReader_6 0.00 root data:TableRangeScan_5",
"└─TableRangeScan_5 0.00 cop[tikv] table:ct2 range:[1 1,1 8], keep order:false"
"TableReader_6 1.00 root data:TableRangeScan_5",
"└─TableRangeScan_5 1.00 cop[tikv] table:ct2 range:[1 1,1 8], keep order:false"
],
[
"TableReader_6 1.00 root data:TableRangeScan_5",
Expand Down
4 changes: 2 additions & 2 deletions util/sqlexec/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,8 @@ func (s *testUtilsSuite) TestEscapeSQL(c *C) {
{
name: "time 3",
input: "select %?",
params: []interface{}{time.Unix(0, 888888888)},
output: "select '1970-01-01 08:00:00.888888'",
params: []interface{}{time.Unix(0, 888888888).UTC()},
output: "select '1970-01-01 00:00:00.888888'",
err: "",
},
{
Expand Down

0 comments on commit 16581f0

Please sign in to comment.