Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: uniform the calculation of pseudo stats. #6483

Merged
merged 7 commits into from
May 9, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions plan/cbo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,8 +495,8 @@ func (s *testAnalyzeSuite) TestOutdatedAnalyze(c *C) {
testKit.MustQuery("explain select * from t where a <= 5 and b <= 5").Check(testkit.Rows(
"IndexScan_8 cop table:t, index:a, range:[-inf,5], keep order:false 26.59",
"TableScan_9 Selection_10 cop table:t, keep order:false 26.59",
"Selection_10 TableScan_9 cop le(test.t.b, 5) 26.67",
"IndexLookUp_11 root index:IndexScan_8, table:Selection_10 26.67",
"Selection_10 TableScan_9 cop le(test.t.b, 5) 8.84",
"IndexLookUp_11 root index:IndexScan_8, table:Selection_10 8.84",
))
}

Expand Down
8 changes: 4 additions & 4 deletions plan/physical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSimpleCase(c *C) {
// Test TopN to table branch in double read.
{
sql: "select * from t where t.c = 1 and t.e = 1 order by t.b limit 1",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)]), Table(t)->TopN([test.t.b],0,1))->TopN([test.t.b],0,1)",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)]), Table(t))->TopN([test.t.b],0,1)",
},
// Test Null Range
{
Expand All @@ -92,7 +92,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSimpleCase(c *C) {
// Test TopN to index branch in double read.
{
sql: "select * from t where t.c = 1 and t.e = 1 order by t.e limit 1",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)])->TopN([test.t.e],0,1), Table(t))->TopN([test.t.e],0,1)",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)]), Table(t))->TopN([test.t.e],0,1)",
},
// Test TopN to Limit in double read.
{
Expand Down Expand Up @@ -765,7 +765,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderAgg(c *C) {
// Test hash agg + index double.
{
sql: "select sum(e), avg(b + c) from t where c = 1 and e = 1 group by d",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)]), Table(t)->HashAgg)->HashAgg",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)]), Table(t))->HashAgg",
},
// Test stream agg + index double.
{
Expand All @@ -775,7 +775,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderAgg(c *C) {
// Test hash agg + order.
{
sql: "select sum(e) as k, avg(b + c) from t where c = 1 and b = 1 and e = 1 group by d order by k",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)]), Table(t)->Sel([eq(test.t.b, 1)])->HashAgg)->HashAgg->Sort",
best: "IndexLookUp(Index(t.c_d_e)[[1,1]]->Sel([eq(test.t.e, 1)]), Table(t)->Sel([eq(test.t.b, 1)]))->HashAgg->Sort",
},
// Test stream agg + order.
{
Expand Down
2 changes: 2 additions & 0 deletions plan/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ import (

"github.com/juju/errors"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/statistics"
log "github.com/sirupsen/logrus"
)

// statsInfo stores the basic information of statistics for the plan's output. It is used for cost estimation.
type statsInfo struct {
count float64
cardinality []float64
colHist []statistics.Column
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that it is not used?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh it's something should be done in next pr.

}

func (s *statsInfo) String() string {
Expand Down
12 changes: 6 additions & 6 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio
}
// TODO: If len(exprs) is bigger than 63, we could use bitset structure to replace the int64.
// This will simplify some code and speed up if we use this rather than a boolean slice.
if t.Pseudo || len(exprs) > 63 || (len(t.Columns) == 0 && len(t.Indices) == 0) {
if len(exprs) > 63 || (len(t.Columns) == 0 && len(t.Indices) == 0) {
return pseudoSelectivity(t, exprs), nil
}
var sets []*exprSet
Expand All @@ -142,8 +142,7 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio
extractedCols = expression.ExtractColumnsFromExpressions(extractedCols, exprs, nil)
for _, colInfo := range t.Columns {
col := expression.ColInfo2Col(extractedCols, colInfo.Info)
// This column should have histogram.
if col != nil && !t.ColumnIsInvalid(ctx.GetSessionVars().StmtCtx, col.ID) {
if col != nil {
maskCovered, ranges, err := getMaskAndRanges(ctx, exprs, ranger.ColumnRangeType, nil, col)
if err != nil {
return 0, errors.Trace(err)
Expand All @@ -156,8 +155,7 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio
}
for _, idxInfo := range t.Indices {
idxCols, lengths := expression.IndexInfo2Cols(extractedCols, idxInfo.Info)
// This index should have histogram.
if len(idxCols) > 0 && idxInfo.Histogram.Len() > 0 {
if len(idxCols) > 0 {
maskCovered, ranges, err := getMaskAndRanges(ctx, exprs, ranger.IndexRangeType, lengths, idxCols...)
if err != nil {
return 0, errors.Trace(err)
Expand All @@ -176,7 +174,9 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio
err error
)
switch set.tp {
case pkType, colType:
case pkType:
rowCount, err = t.GetRowCountByIntColumnRanges(sc, set.ID, set.ranges)
case colType:
rowCount, err = t.GetRowCountByColumnRanges(sc, set.ID, set.ranges)
case indexType:
rowCount, err = t.GetRowCountByIndexRanges(sc, set.ID, set.ranges)
Expand Down