Skip to content

Commit

Permalink
stats: specially handle unqiue key when estimate (#13354) (#13385)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored and sre-bot committed Nov 12, 2019
1 parent 76a9f53 commit e318b05
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 4 deletions.
2 changes: 1 addition & 1 deletion statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ func (q *QueryFeedback) recalculateExpectCount(h *Handle) error {
expected *= idx.getIncreaseFactor(t.Count)
} else {
c := t.Columns[id]
expected, err = c.getColumnRowCount(sc, ranges, t.ModifyCount)
expected, err = c.getColumnRowCount(sc, ranges, t.ModifyCount, true)
expected *= c.getIncreaseFactor(t.Count)
}
if err != nil {
Expand Down
12 changes: 11 additions & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, mo
}

// getColumnRowCount estimates the row count by a slice of Range.
func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64) (float64, error) {
func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, pkIsHandle bool) (float64, error) {
var rowCount float64
for _, rg := range ranges {
cmp, err := rg.LowVal[0].CompareDatum(sc, &rg.HighVal[0])
Expand All @@ -816,6 +816,11 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
if cmp == 0 {
// the point case.
if !rg.LowExclude && !rg.HighExclude {
// In this case, the row count is at most 1.
if pkIsHandle {
rowCount += 1
continue
}
var cnt float64
cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount)
if err != nil {
Expand Down Expand Up @@ -909,6 +914,11 @@ func (idx *Index) getRowCount(sc *stmtctx.StatementContext, indexRanges []*range
continue
}
if fullLen {
// At most 1 in this case.
if idx.Info.Unique {
totalCount += 1
continue
}
count, err := idx.equalRowCount(sc, lb, modifyCount)
if err != nil {
return 0, err
Expand Down
33 changes: 33 additions & 0 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,40 @@ func (s *testSelectivitySuite) TestEstimationForUnknownValues(c *C) {
c.Assert(count, Equals, 0.0)
}

func (s *testSelectivitySuite) TestEstimationUniqueKeyEqualConds(c *C) {
defer cleanEnv(c, s.store, s.dom)
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, c int, unique key(b))")
testKit.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7)")
testKit.MustExec("analyze table t")
table, err := s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
statsTbl := s.dom.StatsHandle().GetTableStats(table.Meta())

sc := &stmtctx.StatementContext{}
idxID := table.Meta().Indices[0].ID
count, err := statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(7, 7))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)

count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(6, 6))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)

colID := table.Meta().Columns[0].ID
count, err = statsTbl.GetRowCountByIntColumnRanges(sc, colID, getRange(7, 7))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)

count, err = statsTbl.GetRowCountByIntColumnRanges(sc, colID, getRange(6, 6))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)
}

func (s *testSelectivitySuite) TestPrimaryKeySelectivity(c *C) {
defer cleanEnv(c, s.store, s.dom)
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
Expand Down
10 changes: 8 additions & 2 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext,
return getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.Count)), nil
}
c := coll.Columns[colID]
result, err := c.getColumnRowCount(sc, intRanges, coll.ModifyCount)
result, err := c.getColumnRowCount(sc, intRanges, coll.ModifyCount, true)
result *= c.getIncreaseFactor(coll.Count)
return result, errors.Trace(err)
}
Expand All @@ -427,7 +427,7 @@ func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, co
return getPseudoRowCountByColumnRanges(sc, float64(coll.Count), colRanges, 0)
}
c := coll.Columns[colID]
result, err := c.getColumnRowCount(sc, colRanges, coll.ModifyCount)
result, err := c.getColumnRowCount(sc, colRanges, coll.ModifyCount, false)
result *= c.getIncreaseFactor(coll.Count)
return result, errors.Trace(err)
}
Expand Down Expand Up @@ -546,6 +546,12 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64
totalCount := float64(0)
for _, ran := range indexRanges {
rangePosition := getOrdinalOfRangeCond(sc, ran)
coverAll := len(ran.LowVal) == len(idx.Info.Columns) && rangePosition == len(ran.LowVal)
// // In this case, the row count is at most 1.
if coverAll && idx.Info.Unique {
totalCount += 1.0
continue
}
// If first one is range, just use the previous way to estimate; if it is [NULL, NULL] range
// on single-column index, use previous way as well, because CMSketch does not contain null
// values in this case.
Expand Down

0 comments on commit e318b05

Please sign in to comment.