Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: fix the inappropriate heuristic rule to estimate the EQ selectivity when out of range #18543

Merged
merged 7 commits into from
Aug 5, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions cmd/explaintest/r/explain_union_scan.result
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ Limit_20 10.00 root offset:0, count:10
└─HashJoin_22 10.00 root left outer join, equal:[eq(test.city.province_id, test.city.province_id)]
├─Limit_25(Build) 10.00 root offset:0, count:10
│ └─IndexJoin_38 10.00 root inner join, inner:UnionScan_37, outer key:test.city.id, inner key:test.city.id
│ ├─UnionScan_47(Build) 10.33 root
│ │ └─TableReader_49 10.33 root data:TableFullScan_48
│ │ └─TableFullScan_48 10.33 cop[tikv] table:t2 keep order:false
│ └─UnionScan_37(Probe) 0.97 root gt(test.city.province_id, 1), lt(test.city.province_id, 100)
│ └─IndexLookUp_36 0.97 root
│ ├─UnionScan_47(Build) 10.00 root
│ │ └─TableReader_49 10.00 root data:TableFullScan_48
│ │ └─TableFullScan_48 10.00 cop[tikv] table:t2 keep order:false
│ └─UnionScan_37(Probe) 1.00 root gt(test.city.province_id, 1), lt(test.city.province_id, 100)
│ └─IndexLookUp_36 1.00 root
│ ├─IndexRangeScan_33(Build) 1.00 cop[tikv] table:t1, index:PRIMARY(id) range: decided by [eq(test.city.id, test.city.id)], keep order:false
│ └─Selection_35(Probe) 0.97 cop[tikv] gt(test.city.province_id, 1), lt(test.city.province_id, 100)
│ └─Selection_35(Probe) 1.00 cop[tikv] gt(test.city.province_id, 1), lt(test.city.province_id, 100)
│ └─TableRowIDScan_34 1.00 cop[tikv] table:t1 keep order:false
└─UnionScan_57(Probe) 519304.44 root gt(test.city.province_id, 1), lt(test.city.province_id, 100), not(isnull(test.city.province_id))
└─TableReader_60 519304.44 root data:Selection_59
└─Selection_59 519304.44 cop[tikv] gt(test.city.province_id, 1), lt(test.city.province_id, 100), not(isnull(test.city.province_id))
└─UnionScan_57(Probe) 536284.00 root gt(test.city.province_id, 1), lt(test.city.province_id, 100), not(isnull(test.city.province_id))
└─TableReader_60 536284.00 root data:Selection_59
└─Selection_59 536284.00 cop[tikv] gt(test.city.province_id, 1), lt(test.city.province_id, 100), not(isnull(test.city.province_id))
└─TableFullScan_58 536284.00 cop[tikv] table:t3 keep order:false
commit;
4 changes: 2 additions & 2 deletions planner/core/cbo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,8 @@ func (s *testAnalyzeSuite) TestOutdatedAnalyze(c *C) {
c.Assert(h.Update(dom.InfoSchema()), IsNil)
statistics.RatioOfPseudoEstimate.Store(10.0)
testKit.MustQuery("explain select * from t where a <= 5 and b <= 5").Check(testkit.Rows(
"TableReader_7 35.91 root data:Selection_6",
"└─Selection_6 35.91 cop[tikv] le(test.t.a, 5), le(test.t.b, 5)",
"TableReader_7 29.77 root data:Selection_6",
"└─Selection_6 29.77 cop[tikv] le(test.t.a, 5), le(test.t.b, 5)",
" └─TableFullScan_5 80.00 cop[tikv] table:t keep order:false",
))
statistics.RatioOfPseudoEstimate.Store(0.7)
Expand Down
13 changes: 7 additions & 6 deletions planner/core/testdata/analyze_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -347,17 +347,18 @@
{
"SQL": "explain select * from t where a = 7639902",
"Plan": [
"IndexReader_6 2.03 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 2.03 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
"IndexReader_6 6.68 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
]
},
{
"SQL": "explain select c, b from t where a = 7639902 order by b asc limit 6",
"Plan": [
"Projection_7 2.03 root test.t.c, test.t.b",
"└─TopN_10 2.03 root test.t.b, offset:0, count:6",
" └─IndexReader_18 2.03 root index:IndexRangeScan_17",
" └─IndexRangeScan_17 2.03 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
"Projection_7 6.00 root test.t.c, test.t.b",
"└─TopN_8 6.00 root test.t.b, offset:0, count:6",
" └─IndexReader_16 6.00 root index:TopN_15",
" └─TopN_15 6.00 cop[tikv] test.t.b, offset:0, count:6",
" └─IndexRangeScan_14 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
]
}
]
Expand Down
4 changes: 2 additions & 2 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1524,8 +1524,8 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) {
sql: "select * from t where a = 2 and b > 10",
hist: "column:2 ndv:20 totColSize:20\n" +
"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0\n" +
"num: 6 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
"num: 7 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0",
"num: 4 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
"num: 5 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0",
rangeID: tblInfo.Columns[1].ID,
idxID: tblInfo.Indices[0].ID,
eqCount: 3,
Expand Down
8 changes: 4 additions & 4 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, mo
return 0.0, nil
}
if c.NDV > 0 && c.outOfRange(val) {
return float64(modifyCount) / float64(c.NDV), nil
return outOfRangeEQSelectivity(c.NDV, modifyCount) * c.TotalRowCount(), nil
}
if c.CMSketch != nil {
count, err := c.CMSketch.queryValue(sc, val)
Expand Down Expand Up @@ -829,7 +829,7 @@ func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
// The interval case.
cnt := c.BetweenRowCount(lowVal, highVal)
if (c.outOfRange(lowVal) && !lowVal.IsNull()) || c.outOfRange(highVal) {
cnt += float64(modifyCount) / outOfRangeBetweenRate
cnt += outOfRangeEQSelectivity(outOfRangeBetweenRate, modifyCount) * c.TotalRowCount()
}
// `betweenRowCount` returns count for [l, h) range, we adjust cnt for boudaries here.
// Note that, `cnt` does not include null values, we need specially handle cases
Expand Down Expand Up @@ -891,7 +891,7 @@ func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCo
}
val := types.NewBytesDatum(b)
if idx.NDV > 0 && idx.outOfRange(val) {
return float64(modifyCount) / (float64(idx.NDV)), nil
return outOfRangeEQSelectivity(idx.NDV, modifyCount) * idx.TotalRowCount(), nil
}
if idx.CMSketch != nil {
return float64(idx.CMSketch.QueryBytes(b)), nil
Expand Down Expand Up @@ -943,7 +943,7 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*range
totalCount += idx.BetweenRowCount(l, r)
lowIsNull := bytes.Equal(lb, nullKeyBytes)
if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) {
totalCount += float64(modifyCount) / outOfRangeBetweenRate
totalCount += outOfRangeEQSelectivity(outOfRangeBetweenRate, modifyCount) * idx.TotalRowCount()
}
if isSingleCol && lowIsNull {
totalCount += float64(idx.NullCount)
Expand Down
6 changes: 3 additions & 3 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,15 +396,15 @@ func (s *testStatsSuite) TestEstimationForUnknownValues(c *C) {
colID := table.Meta().Columns[0].ID
count, err := statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(30, 30))
c.Assert(err, IsNil)
c.Assert(count, Equals, 2.0)
c.Assert(count, Equals, 0.2)

count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(9, 30))
c.Assert(err, IsNil)
c.Assert(count, Equals, 4.2)
c.Assert(count, Equals, 2.4000000000000004)

count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(9, math.MaxInt64))
c.Assert(err, IsNil)
c.Assert(count, Equals, 4.2)
c.Assert(count, Equals, 2.4000000000000004)

idxID := table.Meta().Indices[0].ID
count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(30, 30))
Expand Down
23 changes: 17 additions & 6 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,21 @@ func isSingleColIdxNullRange(idx *Index, ran *ranger.Range) bool {
return false
}

func outOfRangeEQSelectivity(ndv, modifyRows int64) float64 {
// It must be 0 since the histogram contains the whole data if modifyRows is 0.
if modifyRows == 0 {
return 0
}
// We simply set its selectivity to 1/NDV, and the magic number outOfRangeBetweenRate
// is used to avoid wrong selectivity caused by small NDV.
if ndv < outOfRangeBetweenRate {
ndv = outOfRangeBetweenRate
}
// TODO: After extracting TopN from histograms, we can minus the TopN fraction here.
// Please see https://github.com/pingcap/tidb/issues/18461 for more details.
return 1 / float64(ndv)
}

// getEqualCondSelectivity gets the selectivity of the equal conditions.
func (coll *HistColl) getEqualCondSelectivity(idx *Index, bytes []byte, usedColsLen int) float64 {
coverAll := len(idx.Info.Columns) == usedColsLen
Expand All @@ -404,8 +419,7 @@ func (coll *HistColl) getEqualCondSelectivity(idx *Index, bytes []byte, usedCols
// When the value is out of range, we could not found this value in the CM Sketch,
// so we use heuristic methods to estimate the selectivity.
if idx.NDV > 0 && coverAll {
// for equality queries
return float64(coll.ModifyCount) / float64(idx.NDV) / idx.TotalRowCount()
return outOfRangeEQSelectivity(idx.NDV, coll.ModifyCount)
}
// The equal condition only uses prefix columns of the index.
colIDs := coll.Idx2ColumnIDs[idx.ID]
Expand All @@ -416,10 +430,7 @@ func (coll *HistColl) getEqualCondSelectivity(idx *Index, bytes []byte, usedCols
}
ndv = mathutil.MaxInt64(ndv, coll.Columns[colID].NDV)
}
if ndv > 0 {
return float64(coll.ModifyCount) / float64(ndv) / idx.TotalRowCount()
}
return float64(coll.ModifyCount) / outOfRangeBetweenRate / idx.TotalRowCount()
return outOfRangeEQSelectivity(ndv, coll.ModifyCount)
}
return float64(idx.CMSketch.QueryBytes(bytes)) / float64(idx.TotalRowCount())
}
Expand Down