diff --git a/go.sum b/go.sum index c5c27bdca6c6c..17f64067a1eab 100644 --- a/go.sum +++ b/go.sum @@ -465,6 +465,7 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sasha-s/go-deadlock v0.2.0/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= +github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44 h1:tB9NOR21++IjLyVx3/PCPhWMwqGNCMQEH96A6dMZ/gc= github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shirou/gopsutil v2.19.10+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shirou/gopsutil v3.20.12+incompatible h1:6VEGkOXP/eP4o2Ilk8cSsX0PhOEfX6leqAnD+urrp9M= diff --git a/statistics/histogram.go b/statistics/histogram.go index d5a1a22742e1d..58df8dd1a22bc 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -23,6 +23,7 @@ import ( "unsafe" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/parser/charset" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" @@ -1233,15 +1234,20 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde if isSingleCol && lowIsNull { totalCount += float64(idx.NullCount) } + expBackoffSuccess := false // Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything. // If the first column's range is point. if rangePosition := GetOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer == Version2 && coll != nil { - expBackoffSel, err := idx.expBackoffEstimation(sc, coll, indexRange) + var expBackoffSel float64 + expBackoffSel, expBackoffSuccess, err = idx.expBackoffEstimation(sc, coll, indexRange) if err != nil { return 0, err } - totalCount += expBackoffSel * idx.TotalRowCount() - } else { + if expBackoffSuccess { + totalCount += expBackoffSel * idx.TotalRowCount() + } + } + if !expBackoffSuccess { totalCount += idx.BetweenRowCount(l, r) } } @@ -1252,7 +1258,7 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde } // expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details. -func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistColl, indexRange *ranger.Range) (float64, error) { +func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistColl, indexRange *ranger.Range) (float64, bool, error) { tmpRan := []*ranger.Range{ { LowVal: make([]types.Datum, 1), @@ -1286,7 +1292,7 @@ func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistC continue } if err != nil { - return 0, err + return 0, false, err } singleColumnEstResults = append(singleColumnEstResults, count) } @@ -1299,14 +1305,20 @@ func (idx *Index) expBackoffEstimation(sc *stmtctx.StatementContext, coll *HistC for i := 0; i < l && i < 4; i++ { singleColumnEstResults[i] = singleColumnEstResults[i] / float64(coll.Count) } + failpoint.Inject("cleanEstResults", func() { + singleColumnEstResults = singleColumnEstResults[:0] + l = 0 + }) if l == 1 { - return singleColumnEstResults[0], nil + return singleColumnEstResults[0], true, nil } else if l == 2 { - return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]), nil + return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]), true, nil } else if l == 3 { - return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])), nil + return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])), true, nil + } else if l == 0 { + return 0, false, nil } - return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])) * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3]))), nil + return singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) * math.Sqrt(math.Sqrt(singleColumnEstResults[2])) * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3]))), true, nil } type countByRangeFunc = func(*stmtctx.StatementContext, int64, []*ranger.Range) (float64, error) diff --git a/statistics/integration_test.go b/statistics/integration_test.go index 4d75569cdf41d..6bb4ae09dac36 100644 --- a/statistics/integration_test.go +++ b/statistics/integration_test.go @@ -14,6 +14,7 @@ package statistics_test import ( . "github.com/pingcap/check" + "github.com/pingcap/failpoint" "github.com/pingcap/parser/model" "github.com/pingcap/tidb/domain" "github.com/pingcap/tidb/kv" @@ -203,14 +204,23 @@ func (s *testIntegrationSuite) TestExpBackoffEstimation(c *C) { output [][]string ) s.testData.GetTestCases(c, &input, &output) + inputLen := len(input) // The test cases are: // Query a = 1, b = 1, c = 1, d >= 3 and d <= 5 separately. We got 5, 3, 2, 3. // And then query and a = 1 and b = 1 and c = 1 and d >= 3 and d <= 5. It's result should follow the exp backoff, // which is 2/5 * (3/5)^{1/2} * (3/5)*{1/4} * 1^{1/8} * 5 = 1.3634. - for i := 0; i < len(input); i++ { + for i := 0; i < inputLen-1; i++ { s.testData.OnRecord(func() { output[i] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[i]).Rows()) }) tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...)) } + + // The last case is that no column is loaded and we get no stats at all. + c.Assert(failpoint.Enable("github.com/pingcap/tidb/statistics/cleanEstResults", `return(true)`), IsNil) + s.testData.OnRecord(func() { + output[inputLen-1] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[inputLen-1]).Rows()) + }) + tk.MustQuery(input[inputLen-1]).Check(testkit.Rows(output[inputLen-1]...)) + c.Assert(failpoint.Disable("github.com/pingcap/tidb/statistics/cleanEstResults"), IsNil) } diff --git a/statistics/testdata/integration_suite_in.json b/statistics/testdata/integration_suite_in.json index 733a1203f0c7c..61f4badc3bb72 100644 --- a/statistics/testdata/integration_suite_in.json +++ b/statistics/testdata/integration_suite_in.json @@ -6,6 +6,7 @@ "explain select * from exp_backoff where b = 1", "explain select * from exp_backoff where c = 1", "explain select * from exp_backoff where d >= 3 and d <= 5", + "explain select * from exp_backoff where a = 1 and b = 1 and c = 1 and d >= 3 and d<= 5", "explain select * from exp_backoff where a = 1 and b = 1 and c = 1 and d >= 3 and d<= 5" ] } diff --git a/statistics/testdata/integration_suite_out.json b/statistics/testdata/integration_suite_out.json index f8b3d60714869..d5f6ff224a282 100644 --- a/statistics/testdata/integration_suite_out.json +++ b/statistics/testdata/integration_suite_out.json @@ -24,6 +24,10 @@ [ "IndexReader_6 1.36 root index:IndexRangeScan_5", "└─IndexRangeScan_5 1.36 cop[tikv] table:exp_backoff, index:idx(a, b, c, d) range:[1 1 1 3,1 1 1 5], keep order:false" + ], + [ + "IndexReader_6 0.00 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 0.00 cop[tikv] table:exp_backoff, index:idx(a, b, c, d) range:[1 1 1 3,1 1 1 5], keep order:false" ] ] }