Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, statistics: use the correct column ID when recording stats loading status #52208

Merged
merged 13 commits into from
Apr 2, 2024
Merged
12 changes: 10 additions & 2 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistCo
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
colInfoID := colID
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should distinguish colId and colUniqueId in explain such as "colID: xxx" , "colUniqueID: XXX"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want the column name in the EXPLAIN result, which should be fetched using the column ID from the metadata instead of the UniqueID.

if c != nil && c.Info != nil {
name = c.Info.Name.O
}
Expand Down Expand Up @@ -83,7 +87,11 @@ func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.His
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
colInfoID := colID
if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/planstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 4,
shard_count = 5,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
45 changes: 45 additions & 0 deletions pkg/planner/core/casetest/planstats/plan_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,48 @@ func TestCollectDependingVirtualCols(t *testing.T) {
require.Equal(t, output[i].OutputColNames, cols)
}
}

func TestPartialStatsInExplain(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
tk.MustExec("create table t2(a int, primary key(a))")
tk.MustExec("insert into t2 values (1),(2),(3)")
tk.MustExec(
"create table tp(a int, b int, c int, index ic(c)) partition by range(a)" +
"(partition p0 values less than (10)," +
"partition p1 values less than (20)," +
"partition p2 values less than maxvalue)",
)
tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)")

oriLease := dom.StatsHandle().Lease()
dom.StatsHandle().SetLease(1)
defer func() {
dom.StatsHandle().SetLease(oriLease)
}()
tk.MustExec("analyze table t")
tk.MustExec("analyze table t2")
tk.MustExec("analyze table tp")
tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema()))
tk.MustQuery("explain select * from tp where a = 1")
tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
var (
input []string
output []struct {
Query string
Result []string
}
)
testData := GetPlanStatsData()
testData.LoadTestCases(t, &input, &output)
for i, sql := range input {
testdata.OnRecord(func() {
output[i].Query = input[i]
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows())
})
tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,13 @@
]
}
]
},
{
"name": "TestPartialStatsInExplain",
"cases": [
"explain format = brief select * from tp where b = 10",
"explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c"
]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,47 @@
]
}
]
},
{
"Name": "TestPartialStatsInExplain",
"Cases": [
{
"Query": "explain format = brief select * from tp where b = 10",
"Result": [
"TableReader 0.01 root partition:all data:Selection",
"└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)",
" └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"Result": [
"Projection 0.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
"└─HashJoin 0.00 root inner join, equal:[eq(test.tp.c, test.t.b)]",
" ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
" │ └─Selection 0.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))",
" │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
" └─TableReader(Probe) 3.00 root data:Selection",
" └─Selection 3.00 cop[tikv] not(isnull(test.t.b))",
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
"Result": [
"HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]",
"├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
"│ ├─TableReader(Build) 0.33 root data:Selection",
"│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
"│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
"│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
"│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
"│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
"│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
"└─TableReader(Probe) 1.00 root data:TableRangeScan",
" └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
]
}
]
}
]
31 changes: 19 additions & 12 deletions pkg/statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,16 @@ const (

// HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.
type HistColl struct {
// Note that Column use UniqueID as the key while Indices use the index ID in the metadata.
Columns map[int64]*Column
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Big thanks for refactoring this structure!

Indices map[int64]*Index
// Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner.
// Idx2ColumnIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner.
Idx2ColumnIDs map[int64][]int64
winoros marked this conversation as resolved.
Show resolved Hide resolved
// ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner.
// ColID2IdxIDs maps the column UniqueID to a list index ids whose first column is it.
// It's used to calculate the selectivity in planner.
ColID2IdxIDs map[int64][]int64
// UniqueID2colInfoID maps the column UniqueID to its ID in the metadata.
UniqueID2colInfoID map[int64]int64
// MVIdx2Columns maps the index id to its columns by expression.Column.
// For normal index, the column id is enough, as we already have in Idx2ColumnIDs. But currently, mv index needs more
// information to match the filter against the mv index columns, and we need this map to provide this information.
Expand Down Expand Up @@ -804,9 +808,11 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl {
func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, columns []*expression.Column) *HistColl {
newColHistMap := make(map[int64]*Column)
colInfoID2UniqueID := make(map[int64]int64, len(columns))
uniqueID2colInfoID := make(map[int64]int64, len(columns))
idxID2idxInfo := make(map[int64]*model.IndexInfo)
for _, col := range columns {
colInfoID2UniqueID[col.ID] = col.UniqueID
uniqueID2colInfoID[col.UniqueID] = col.ID
}
for id, colHist := range coll.Columns {
uniqueID, ok := colInfoID2UniqueID[id]
Expand Down Expand Up @@ -853,16 +859,17 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, c
slices.Sort(idxIDs)
}
newColl := &HistColl{
PhysicalID: coll.PhysicalID,
HavePhysicalID: coll.HavePhysicalID,
Pseudo: coll.Pseudo,
RealtimeCount: coll.RealtimeCount,
ModifyCount: coll.ModifyCount,
Columns: newColHistMap,
Indices: newIdxHistMap,
ColID2IdxIDs: colID2IdxIDs,
Idx2ColumnIDs: idx2Columns,
MVIdx2Columns: mvIdx2Columns,
PhysicalID: coll.PhysicalID,
HavePhysicalID: coll.HavePhysicalID,
Pseudo: coll.Pseudo,
RealtimeCount: coll.RealtimeCount,
ModifyCount: coll.ModifyCount,
Columns: newColHistMap,
Indices: newIdxHistMap,
ColID2IdxIDs: colID2IdxIDs,
Idx2ColumnIDs: idx2Columns,
UniqueID2colInfoID: uniqueID2colInfoID,
MVIdx2Columns: mvIdx2Columns,
}
return newColl
}
Expand Down