Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#52208
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <[email protected]>
  • Loading branch information
time-and-fate authored and ti-chi-bot committed Apr 2, 2024
1 parent 091d31e commit 05e413e
Show file tree
Hide file tree
Showing 14 changed files with 217 additions and 32 deletions.
1 change: 1 addition & 0 deletions build/nogo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
"fieldalignment": {
"exclude_files": {
"pkg/parser/parser.go": "parser/parser.go code",
"pkg/statistics/table.go": "disable this limitation that prevents us from splitting struct fields for clarity",
"external/": "no need to vet third party code",
".*_generated\\.go$": "ignore generated code",
".*_/testmain\\.go$": "ignore code",
Expand Down
8 changes: 4 additions & 4 deletions pkg/planner/cardinality/cross_estimation.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func crossEstimateRowCount(sctx sessionctx.Context,
if col == nil || len(path.AccessConds) > 0 {
return 0, false, corr
}
colID := col.UniqueID
colUniqueID := col.UniqueID
if corr < 0 {
desc = !desc
}
Expand All @@ -139,11 +139,11 @@ func crossEstimateRowCount(sctx sessionctx.Context,
return 0, err == nil, corr
}
idxID := int64(-1)
idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID]
idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colUniqueID]
if idxExists && len(idxIDs) > 0 {
idxID = idxIDs[0]
}
rangeCounts, ok := getColumnRangeCounts(sctx, colID, ranges, dsTableStats.HistColl, idxID)
rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
if !ok {
return 0, false, corr
}
Expand All @@ -155,7 +155,7 @@ func crossEstimateRowCount(sctx sessionctx.Context,
if idxExists {
rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
} else {
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colID, convertedRanges)
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
}
if err != nil {
return 0, false, corr
Expand Down
37 changes: 31 additions & 6 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,37 @@ func init() {
}

// GetRowCountByColumnRanges estimates the row count by a slice of Range.
<<<<<<< HEAD
func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) {
=======
func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) {
>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208))
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, colRanges)
debugTraceGetRowCountInput(sctx, colUniqueID, colRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
c, ok := coll.Columns[colUniqueID]
colInfoID := colUniqueID
if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
<<<<<<< HEAD
if !ok || c.IsInvalid(sctx, coll.Pseudo) {
result, err = getPseudoRowCountByColumnRanges(sc, float64(coll.RealtimeCount), colRanges, 0)
=======
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0)
>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208))
if err == nil && sc.EnableOptimizerCETrace && ok {
ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result))
}
Expand All @@ -71,23 +84,35 @@ func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistCol
}

// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
<<<<<<< HEAD
func GetRowCountByIntColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) {
=======
func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) {
>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208))
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, intRanges)
debugTraceGetRowCountInput(sctx, colUniqueID, intRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
c, ok := coll.Columns[colUniqueID]
colInfoID := colUniqueID
if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
<<<<<<< HEAD
if !ok || c.IsInvalid(sctx, coll.Pseudo) {
=======
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208))
if len(intRanges) == 0 {
return 0, nil
}
Expand Down
18 changes: 9 additions & 9 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,19 @@ func getIndexRowCountForStatsV1(sctx sessionctx.Context, coll *statistics.HistCo
}
var count float64
var err error
colIDs := coll.Idx2ColumnIDs[idxID]
var colID int64
if rangePosition >= len(colIDs) {
colID = -1
colUniqueIDs := coll.Idx2ColUniqueIDs[idxID]
var colUniqueID int64
if rangePosition >= len(colUniqueIDs) {
colUniqueID = -1
} else {
colID = colIDs[rangePosition]
colUniqueID = colUniqueIDs[rangePosition]
}
// prefer index stats over column stats
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 {
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
idxID := idxIDs[0]
count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
} else {
count, err = GetRowCountByColumnRanges(sctx, coll, colID, []*ranger.Range{&rang})
count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
}
if err != nil {
return 0, errors.Trace(err)
Expand Down Expand Up @@ -409,7 +409,7 @@ func expBackoffEstimation(sctx sessionctx.Context, idx *statistics.Index, coll *
Collators: make([]collate.Collator, 1),
},
}
colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID]
colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID]
singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
// 1. Calc the selectivity of each column.
Expand All @@ -434,7 +434,7 @@ func expBackoffEstimation(sctx sessionctx.Context, idx *statistics.Index, coll *
foundStats = true
count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan)
}
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
// check avoids infinite recursion.
Expand Down
25 changes: 22 additions & 3 deletions pkg/planner/cardinality/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,26 @@ func Selectivity(
slices.Sort(idxIDs)
for _, id := range idxIDs {
idxStats := coll.Indices[id]
<<<<<<< HEAD
idxCols := findPrefixOfIndexByCol(extractedCols, coll.Idx2ColumnIDs[id], id2Paths[idxStats.ID])
=======
idxInfo := idxStats.Info
if idxInfo.MVIndex {
totalSelectivity, mask, ok := getMaskAndSelectivityForMVIndex(ctx, coll, id, remainedExprs)
if !ok {
continue
}
nodes = append(nodes, &StatsNode{
Tp: IndexType,
ID: id,
mask: mask,
numCols: len(idxInfo.Columns),
Selectivity: totalSelectivity,
})
continue
}
idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColUniqueIDs[id], id2Paths[idxStats.ID])
>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208))
if len(idxCols) > 0 {
lengths := make([]int, 0, len(idxCols))
for i := 0; i < len(idxCols) && i < len(idxStats.Info.Columns); i++ {
Expand Down Expand Up @@ -777,7 +796,7 @@ func findAvailableStatsForCol(sctx sessionctx.Context, coll *statistics.HistColl
return false, uniqueID
}
// try to find available stats in single column index stats (except for prefix index)
for idxStatsIdx, cols := range coll.Idx2ColumnIDs {
for idxStatsIdx, cols := range coll.Idx2ColUniqueIDs {
if len(cols) == 1 && cols[0] == uniqueID {
idxStats, ok := coll.Indices[idxStatsIdx]
if ok &&
Expand Down Expand Up @@ -826,7 +845,7 @@ func getEqualCondSelectivity(sctx sessionctx.Context, coll *statistics.HistColl,
return outOfRangeEQSelectivity(sctx, idx.NDV, coll.RealtimeCount, int64(idx.TotalRowCount())), nil
}
// The equal condition only uses prefix columns of the index.
colIDs := coll.Idx2ColumnIDs[idx.ID]
colIDs := coll.Idx2ColUniqueIDs[idx.ID]
var ndv int64
for i, colID := range colIDs {
if i >= usedColsLen {
Expand Down Expand Up @@ -908,7 +927,7 @@ func crossValidationSelectivity(
}()
}
minRowCount = math.MaxFloat64
cols := coll.Idx2ColumnIDs[idx.ID]
cols := coll.Idx2ColUniqueIDs[idx.ID]
crossValidationSelectivity = 1.0
totalRowCount := idx.TotalRowCount()
for i, colID := range cols {
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,8 +893,8 @@ func generateMapsForMockStatsTbl(statsTbl *statistics.Table) {
for _, idxIDs := range colID2IdxIDs {
slices.Sort(idxIDs)
}
statsTbl.Idx2ColumnIDs = idx2Columns
statsTbl.ColID2IdxIDs = colID2IdxIDs
statsTbl.Idx2ColUniqueIDs = idx2Columns
statsTbl.ColUniqueID2IdxIDs = colID2IdxIDs
}

func TestIssue39593(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/planstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 4,
shard_count = 5,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
45 changes: 45 additions & 0 deletions pkg/planner/core/casetest/planstats/plan_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,3 +400,48 @@ func TestCollectDependingVirtualCols(t *testing.T) {
require.Equal(t, output[i].OutputColNames, cols)
}
}

func TestPartialStatsInExplain(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
tk.MustExec("create table t2(a int, primary key(a))")
tk.MustExec("insert into t2 values (1),(2),(3)")
tk.MustExec(
"create table tp(a int, b int, c int, index ic(c)) partition by range(a)" +
"(partition p0 values less than (10)," +
"partition p1 values less than (20)," +
"partition p2 values less than maxvalue)",
)
tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)")

oriLease := dom.StatsHandle().Lease()
dom.StatsHandle().SetLease(1)
defer func() {
dom.StatsHandle().SetLease(oriLease)
}()
tk.MustExec("analyze table t")
tk.MustExec("analyze table t2")
tk.MustExec("analyze table tp")
tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema()))
tk.MustQuery("explain select * from tp where a = 1")
tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
var (
input []string
output []struct {
Query string
Result []string
}
)
testData := GetPlanStatsData()
testData.LoadTestCases(t, &input, &output)
for i, sql := range input {
testdata.OnRecord(func() {
output[i].Query = input[i]
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows())
})
tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,13 @@
]
}
]
},
{
"name": "TestPartialStatsInExplain",
"cases": [
"explain format = brief select * from tp where b = 10",
"explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c"
]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,47 @@
]
}
]
},
{
"Name": "TestPartialStatsInExplain",
"Cases": [
{
"Query": "explain format = brief select * from tp where b = 10",
"Result": [
"TableReader 0.01 root partition:all data:Selection",
"└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)",
" └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"Result": [
"Projection 0.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
"└─HashJoin 0.00 root inner join, equal:[eq(test.tp.c, test.t.b)]",
" ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
" │ └─Selection 0.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))",
" │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
" └─TableReader(Probe) 3.00 root data:Selection",
" └─Selection 3.00 cop[tikv] not(isnull(test.t.b))",
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
"Result": [
"HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]",
"├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
"│ ├─TableReader(Build) 0.33 root data:Selection",
"│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
"│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
"│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
"│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
"│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
"│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
"└─TableReader(Probe) 1.00 root data:TableRangeScan",
" └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
]
}
]
}
]
2 changes: 1 addition & 1 deletion pkg/planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,7 @@ func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.Hist
// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
for idxID, idxCols := range histColl.Idx2ColumnIDs {
for idxID, idxCols := range histColl.Idx2ColUniqueIDs {
if len(idxCols) != len(colUIDs) {
continue
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1830,8 +1830,8 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
path.IdxCols = append(path.IdxCols, handleCol)
path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength)
// Also updates the map that maps the index id to its prefix column ids.
if len(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID]) == len(path.Index.Columns) {
ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID], handleCol.UniqueID)
if len(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) {
ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID)
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
tbl := ds.tableStats.HistColl
ndvs := make([]property.GroupNDV, 0, len(colGroups))
for idxID, idx := range tbl.Indices {
colsLen := len(tbl.Idx2ColumnIDs[idxID])
// tbl.Idx2ColumnIDs may only contain the prefix of index columns.
colsLen := len(tbl.Idx2ColUniqueIDs[idxID])
// tbl.Idx2ColUniqueIDs may only contain the prefix of index columns.
// But it may exceeds the total index since the index would contain the handle column if it's not a unique index.
// We append the handle at fillIndexPath.
if colsLen < len(idx.Info.Columns) {
Expand All @@ -185,7 +185,7 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
colsLen--
}
idxCols := make([]int64, colsLen)
copy(idxCols, tbl.Idx2ColumnIDs[idxID])
copy(idxCols, tbl.Idx2ColUniqueIDs[idxID])
slices.Sort(idxCols)
for _, g := range colGroups {
// We only want those exact matches.
Expand Down
Loading

0 comments on commit 05e413e

Please sign in to comment.