Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, statistics: use the correct column ID when recording stats loading status (#52208) #52309

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build/nogo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
"fieldalignment": {
"exclude_files": {
"pkg/parser/parser.go": "parser/parser.go code",
"pkg/statistics/table.go": "disable this limitation that prevents us from splitting struct fields for clarity",
"external/": "no need to vet third party code",
".*_generated\\.go$": "ignore generated code",
".*_/testmain\\.go$": "ignore code",
Expand Down
8 changes: 4 additions & 4 deletions pkg/planner/cardinality/cross_estimation.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func crossEstimateRowCount(sctx sessionctx.Context,
if col == nil || len(path.AccessConds) > 0 {
return 0, false, corr
}
colID := col.UniqueID
colUniqueID := col.UniqueID
if corr < 0 {
desc = !desc
}
Expand All @@ -139,11 +139,11 @@ func crossEstimateRowCount(sctx sessionctx.Context,
return 0, err == nil, corr
}
idxID := int64(-1)
idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID]
idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colUniqueID]
if idxExists && len(idxIDs) > 0 {
idxID = idxIDs[0]
}
rangeCounts, ok := getColumnRangeCounts(sctx, colID, ranges, dsTableStats.HistColl, idxID)
rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
if !ok {
return 0, false, corr
}
Expand All @@ -155,7 +155,7 @@ func crossEstimateRowCount(sctx sessionctx.Context,
if idxExists {
rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
} else {
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colID, convertedRanges)
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
}
if err != nil {
return 0, false, corr
Expand Down
24 changes: 16 additions & 8 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,23 @@ func init() {
}

// GetRowCountByColumnRanges estimates the row count by a slice of Range.
func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) {
func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, colRanges)
debugTraceGetRowCountInput(sctx, colUniqueID, colRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
c, ok := coll.Columns[colUniqueID]
colInfoID := colUniqueID
if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
Expand All @@ -71,19 +75,23 @@ func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistCol
}

// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
func GetRowCountByIntColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) {
func GetRowCountByIntColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, intRanges)
debugTraceGetRowCountInput(sctx, colUniqueID, intRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
c, ok := coll.Columns[colUniqueID]
colInfoID := colUniqueID
if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
Expand Down
18 changes: 9 additions & 9 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,19 @@ func getIndexRowCountForStatsV1(sctx sessionctx.Context, coll *statistics.HistCo
}
var count float64
var err error
colIDs := coll.Idx2ColumnIDs[idxID]
var colID int64
if rangePosition >= len(colIDs) {
colID = -1
colUniqueIDs := coll.Idx2ColUniqueIDs[idxID]
var colUniqueID int64
if rangePosition >= len(colUniqueIDs) {
colUniqueID = -1
} else {
colID = colIDs[rangePosition]
colUniqueID = colUniqueIDs[rangePosition]
}
// prefer index stats over column stats
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 {
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
idxID := idxIDs[0]
count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
} else {
count, err = GetRowCountByColumnRanges(sctx, coll, colID, []*ranger.Range{&rang})
count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
}
if err != nil {
return 0, errors.Trace(err)
Expand Down Expand Up @@ -409,7 +409,7 @@ func expBackoffEstimation(sctx sessionctx.Context, idx *statistics.Index, coll *
Collators: make([]collate.Collator, 1),
},
}
colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID]
colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID]
singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
// 1. Calc the selectivity of each column.
Expand All @@ -434,7 +434,7 @@ func expBackoffEstimation(sctx sessionctx.Context, idx *statistics.Index, coll *
foundStats = true
count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan)
}
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
// check avoids infinite recursion.
Expand Down
8 changes: 4 additions & 4 deletions pkg/planner/cardinality/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func Selectivity(
slices.Sort(idxIDs)
for _, id := range idxIDs {
idxStats := coll.Indices[id]
idxCols := findPrefixOfIndexByCol(extractedCols, coll.Idx2ColumnIDs[id], id2Paths[idxStats.ID])
idxCols := findPrefixOfIndexByCol(extractedCols, coll.Idx2ColUniqueIDs[id], id2Paths[idxStats.ID])
if len(idxCols) > 0 {
lengths := make([]int, 0, len(idxCols))
for i := 0; i < len(idxCols) && i < len(idxStats.Info.Columns); i++ {
Expand Down Expand Up @@ -777,7 +777,7 @@ func findAvailableStatsForCol(sctx sessionctx.Context, coll *statistics.HistColl
return false, uniqueID
}
// try to find available stats in single column index stats (except for prefix index)
for idxStatsIdx, cols := range coll.Idx2ColumnIDs {
for idxStatsIdx, cols := range coll.Idx2ColUniqueIDs {
if len(cols) == 1 && cols[0] == uniqueID {
idxStats, ok := coll.Indices[idxStatsIdx]
if ok &&
Expand Down Expand Up @@ -826,7 +826,7 @@ func getEqualCondSelectivity(sctx sessionctx.Context, coll *statistics.HistColl,
return outOfRangeEQSelectivity(sctx, idx.NDV, coll.RealtimeCount, int64(idx.TotalRowCount())), nil
}
// The equal condition only uses prefix columns of the index.
colIDs := coll.Idx2ColumnIDs[idx.ID]
colIDs := coll.Idx2ColUniqueIDs[idx.ID]
var ndv int64
for i, colID := range colIDs {
if i >= usedColsLen {
Expand Down Expand Up @@ -908,7 +908,7 @@ func crossValidationSelectivity(
}()
}
minRowCount = math.MaxFloat64
cols := coll.Idx2ColumnIDs[idx.ID]
cols := coll.Idx2ColUniqueIDs[idx.ID]
crossValidationSelectivity = 1.0
totalRowCount := idx.TotalRowCount()
for i, colID := range cols {
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,8 +893,8 @@ func generateMapsForMockStatsTbl(statsTbl *statistics.Table) {
for _, idxIDs := range colID2IdxIDs {
slices.Sort(idxIDs)
}
statsTbl.Idx2ColumnIDs = idx2Columns
statsTbl.ColID2IdxIDs = colID2IdxIDs
statsTbl.Idx2ColUniqueIDs = idx2Columns
statsTbl.ColUniqueID2IdxIDs = colID2IdxIDs
}

func TestIssue39593(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/planstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 4,
shard_count = 5,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
45 changes: 45 additions & 0 deletions pkg/planner/core/casetest/planstats/plan_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,3 +400,48 @@ func TestCollectDependingVirtualCols(t *testing.T) {
require.Equal(t, output[i].OutputColNames, cols)
}
}

func TestPartialStatsInExplain(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
tk.MustExec("create table t2(a int, primary key(a))")
tk.MustExec("insert into t2 values (1),(2),(3)")
tk.MustExec(
"create table tp(a int, b int, c int, index ic(c)) partition by range(a)" +
"(partition p0 values less than (10)," +
"partition p1 values less than (20)," +
"partition p2 values less than maxvalue)",
)
tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)")

oriLease := dom.StatsHandle().Lease()
dom.StatsHandle().SetLease(1)
defer func() {
dom.StatsHandle().SetLease(oriLease)
}()
tk.MustExec("analyze table t")
tk.MustExec("analyze table t2")
tk.MustExec("analyze table tp")
tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema()))
tk.MustQuery("explain select * from tp where a = 1")
tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
var (
input []string
output []struct {
Query string
Result []string
}
)
testData := GetPlanStatsData()
testData.LoadTestCases(t, &input, &output)
for i, sql := range input {
testdata.OnRecord(func() {
output[i].Query = input[i]
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows())
})
tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,13 @@
]
}
]
},
{
"name": "TestPartialStatsInExplain",
"cases": [
"explain format = brief select * from tp where b = 10",
"explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c"
]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,47 @@
]
}
]
},
{
"Name": "TestPartialStatsInExplain",
"Cases": [
{
"Query": "explain format = brief select * from tp where b = 10",
"Result": [
"TableReader 0.01 root partition:all data:Selection",
"└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)",
" └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[ic:allEvicted, b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"Result": [
"Projection 0.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
"└─HashJoin 0.00 root inner join, equal:[eq(test.tp.c, test.t.b)]",
" ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
" │ └─Selection 0.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))",
" │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[ic:allEvicted, c:allEvicted]",
" └─TableReader(Probe) 3.00 root data:Selection",
" └─Selection 3.00 cop[tikv] not(isnull(test.t.b))",
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
"Result": [
"HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]",
"├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
"│ ├─TableReader(Build) 0.33 root data:Selection",
"│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
"│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
"│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
"│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
"│ │ └─IndexRangeScan 1.00 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[ic:allEvicted, c:allEvicted]",
"│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[ic:allEvicted, c:allEvicted]",
"└─TableReader(Probe) 1.00 root data:TableRangeScan",
" └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
]
}
]
}
]
2 changes: 1 addition & 1 deletion pkg/planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,7 @@ func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.Hist
// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
for idxID, idxCols := range histColl.Idx2ColumnIDs {
for idxID, idxCols := range histColl.Idx2ColUniqueIDs {
if len(idxCols) != len(colUIDs) {
continue
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1830,8 +1830,8 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
path.IdxCols = append(path.IdxCols, handleCol)
path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength)
// Also updates the map that maps the index id to its prefix column ids.
if len(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID]) == len(path.Index.Columns) {
ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID], handleCol.UniqueID)
if len(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) {
ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID)
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
tbl := ds.tableStats.HistColl
ndvs := make([]property.GroupNDV, 0, len(colGroups))
for idxID, idx := range tbl.Indices {
colsLen := len(tbl.Idx2ColumnIDs[idxID])
// tbl.Idx2ColumnIDs may only contain the prefix of index columns.
colsLen := len(tbl.Idx2ColUniqueIDs[idxID])
// tbl.Idx2ColUniqueIDs may only contain the prefix of index columns.
// But it may exceeds the total index since the index would contain the handle column if it's not a unique index.
// We append the handle at fillIndexPath.
if colsLen < len(idx.Info.Columns) {
Expand All @@ -185,7 +185,7 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
colsLen--
}
idxCols := make([]int64, colsLen)
copy(idxCols, tbl.Idx2ColumnIDs[idxID])
copy(idxCols, tbl.Idx2ColUniqueIDs[idxID])
slices.Sort(idxCols)
for _, g := range colGroups {
// We only want those exact matches.
Expand Down
Loading