From c35daf54d2db6d4a6f3534c0f3bbdd2e1aeddd84 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:40:27 +0800 Subject: [PATCH 1/9] fix --- pkg/planner/cardinality/row_count_column.go | 12 ++++- .../casetest/planstats/plan_stats_test.go | 45 +++++++++++++++++++ .../testdata/plan_stats_suite_in.json | 8 ++++ .../testdata/plan_stats_suite_out.json | 42 +++++++++++++++++ pkg/statistics/table.go | 4 ++ 5 files changed, 109 insertions(+), 2 deletions(-) diff --git a/pkg/planner/cardinality/row_count_column.go b/pkg/planner/cardinality/row_count_column.go index 6f11a95681860..4373cc86e41c6 100644 --- a/pkg/planner/cardinality/row_count_column.go +++ b/pkg/planner/cardinality/row_count_column.go @@ -45,7 +45,11 @@ func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistCo } sc := sctx.GetSessionVars().StmtCtx c, ok := coll.Columns[colID] - recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID) + colInfoID := colID + if len(coll.UniqueID2colInfoID) > 0 { + colInfoID = coll.UniqueID2colInfoID[colID] + } + recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) if c != nil && c.Info != nil { name = c.Info.Name.O } @@ -83,7 +87,11 @@ func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.His } sc := sctx.GetSessionVars().StmtCtx c, ok := coll.Columns[colID] - recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID) + colInfoID := colID + if len(coll.UniqueID2colInfoID) > 0 { + colInfoID = coll.UniqueID2colInfoID[colID] + } + recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) if c != nil && c.Info != nil { name = c.Info.Name.O } diff --git a/pkg/planner/core/casetest/planstats/plan_stats_test.go b/pkg/planner/core/casetest/planstats/plan_stats_test.go index 5337b1460b224..5727858447e77 100644 --- a/pkg/planner/core/casetest/planstats/plan_stats_test.go +++ b/pkg/planner/core/casetest/planstats/plan_stats_test.go @@ -405,3 +405,48 @@ func TestCollectDependingVirtualCols(t *testing.T) { require.Equal(t, output[i].OutputColNames, cols) } } + +func TestPartialStatsInExplain(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))") + tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)") + tk.MustExec("create table t2(a int, primary key(a))") + tk.MustExec("insert into t2 values (1),(2),(3)") + tk.MustExec( + "create table tp(a int, b int, c int, index ic(c)) partition by range(a)" + + "(partition p0 values less than (10)," + + "partition p1 values less than (20)," + + "partition p2 values less than maxvalue)", + ) + tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)") + + oriLease := dom.StatsHandle().Lease() + dom.StatsHandle().SetLease(1) + defer func() { + dom.StatsHandle().SetLease(oriLease) + }() + tk.MustExec("analyze table t") + tk.MustExec("analyze table t2") + tk.MustExec("analyze table tp") + tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema())) + tk.MustQuery("explain select * from tp where a = 1") + tk.MustExec("set @@tidb_stats_load_sync_wait = 0") + var ( + input []string + output []struct { + Query string + Result []string + } + ) + testData := GetPlanStatsData() + testData.LoadTestCases(t, &input, &output) + for i, sql := range input { + testdata.OnRecord(func() { + output[i].Query = input[i] + output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows()) + }) + tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...)) + } +} diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json index c9496da4c8a0a..8f209105e6965 100644 --- a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json +++ b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json @@ -62,5 +62,13 @@ ] } ] + }, + { + "name": "TestPartialStatsInExplain", + "cases": [ + "explain format = brief select * from tp where b = 10", + "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c", + "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c" + ] } ] diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json index 93552f8bed709..92f1647bb88d7 100644 --- a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json +++ b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json @@ -101,5 +101,47 @@ ] } ] + }, + { + "Name": "TestPartialStatsInExplain", + "Cases": [ + { + "Query": "explain format = brief select * from tp where b = 10", + "Result": [ + "TableReader 0.01 root partition:all data:Selection", + "└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)", + " └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]" + ] + }, + { + "Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c", + "Result": [ + "Projection 0.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c", + "└─HashJoin 0.00 root inner join, equal:[eq(test.tp.c, test.t.b)]", + " ├─TableReader(Build) 0.00 root partition:p1 data:Selection", + " │ └─Selection 0.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))", + " │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]", + " └─TableReader(Probe) 3.00 root data:Selection", + " └─Selection 3.00 cop[tikv] not(isnull(test.t.b))", + " └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]" + ] + }, + { + "Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c", + "Result": [ + "HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]", + "├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)", + "│ ├─TableReader(Build) 0.33 root data:Selection", + "│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))", + "│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]", + "│ └─IndexLookUp(Probe) 0.33 root partition:p0 ", + "│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))", + "│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]", + "│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]", + "└─TableReader(Probe) 1.00 root data:TableRangeScan", + " └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]" + ] + } + ] } ] diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index a7be276c83ca1..76e710f0ba7ba 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -221,6 +221,7 @@ type HistColl struct { Idx2ColumnIDs map[int64][]int64 // ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner. ColID2IdxIDs map[int64][]int64 + UniqueID2colInfoID map[int64]int64 // MVIdx2Columns maps the index id to its columns by expression.Column. // For normal index, the column id is enough, as we already have in Idx2ColumnIDs. But currently, mv index needs more // information to match the filter against the mv index columns, and we need this map to provide this information. @@ -753,9 +754,11 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl { func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, columns []*expression.Column) *HistColl { newColHistMap := make(map[int64]*Column) colInfoID2UniqueID := make(map[int64]int64, len(columns)) + uniqueID2colInfoID := make(map[int64]int64, len(columns)) idxID2idxInfo := make(map[int64]*model.IndexInfo) for _, col := range columns { colInfoID2UniqueID[col.ID] = col.UniqueID + uniqueID2colInfoID[col.UniqueID] = col.ID } for id, colHist := range coll.Columns { uniqueID, ok := colInfoID2UniqueID[id] @@ -811,6 +814,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, c Indices: newIdxHistMap, ColID2IdxIDs: colID2IdxIDs, Idx2ColumnIDs: idx2Columns, + UniqueID2colInfoID: uniqueID2colInfoID, MVIdx2Columns: mvIdx2Columns, } return newColl From e60c75229d23e3e8ff066051b4b7c293dd567538 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:47:56 +0800 Subject: [PATCH 2/9] add comments and update bazel --- pkg/planner/core/casetest/planstats/BUILD.bazel | 2 +- pkg/statistics/table.go | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/planner/core/casetest/planstats/BUILD.bazel b/pkg/planner/core/casetest/planstats/BUILD.bazel index dab3681d43653..b8753e55159b9 100644 --- a/pkg/planner/core/casetest/planstats/BUILD.bazel +++ b/pkg/planner/core/casetest/planstats/BUILD.bazel @@ -9,7 +9,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 4, + shard_count = 5, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 76e710f0ba7ba..85c3f3321f8c7 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -215,12 +215,15 @@ const ( // HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity. type HistColl struct { + // Note that Column use UniqueID as the key while Indices use the index ID in the metadata. Columns map[int64]*Column Indices map[int64]*Index - // Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner. + // Idx2ColumnIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner. Idx2ColumnIDs map[int64][]int64 - // ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner. + // ColID2IdxIDs maps the column UniqueID to a list index ids whose first column is it. + // It's used to calculate the selectivity in planner. ColID2IdxIDs map[int64][]int64 + // UniqueID2colInfoID maps the column UniqueID to its ID in the metadata. UniqueID2colInfoID map[int64]int64 // MVIdx2Columns maps the index id to its columns by expression.Column. // For normal index, the column id is enough, as we already have in Idx2ColumnIDs. But currently, mv index needs more From 33291ed9a0ea723a4c5ad64f60393e15034a0c48 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:56:17 +0800 Subject: [PATCH 3/9] fix fmt --- pkg/statistics/table.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 00529dc9fc7d2..7df0c9b91601c 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -859,17 +859,17 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, c slices.Sort(idxIDs) } newColl := &HistColl{ - PhysicalID: coll.PhysicalID, - HavePhysicalID: coll.HavePhysicalID, - Pseudo: coll.Pseudo, - RealtimeCount: coll.RealtimeCount, - ModifyCount: coll.ModifyCount, - Columns: newColHistMap, - Indices: newIdxHistMap, - ColID2IdxIDs: colID2IdxIDs, - Idx2ColumnIDs: idx2Columns, + PhysicalID: coll.PhysicalID, + HavePhysicalID: coll.HavePhysicalID, + Pseudo: coll.Pseudo, + RealtimeCount: coll.RealtimeCount, + ModifyCount: coll.ModifyCount, + Columns: newColHistMap, + Indices: newIdxHistMap, + ColID2IdxIDs: colID2IdxIDs, + Idx2ColumnIDs: idx2Columns, UniqueID2colInfoID: uniqueID2colInfoID, - MVIdx2Columns: mvIdx2Columns, + MVIdx2Columns: mvIdx2Columns, } return newColl } From a44e2b258d83ab7ff0486a9a3faf48549c4f3386 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Fri, 29 Mar 2024 16:44:23 +0800 Subject: [PATCH 4/9] rename the fields as the comments suggest --- pkg/planner/cardinality/cross_estimation.go | 2 +- pkg/planner/cardinality/row_count_index.go | 8 ++++---- pkg/planner/cardinality/selectivity.go | 8 ++++---- pkg/planner/cardinality/selectivity_test.go | 4 ++-- pkg/planner/core/exhaust_physical_plans.go | 2 +- pkg/planner/core/logical_plans.go | 4 ++-- pkg/planner/core/stats.go | 6 +++--- pkg/statistics/table.go | 16 ++++++++-------- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pkg/planner/cardinality/cross_estimation.go b/pkg/planner/cardinality/cross_estimation.go index 59028f24e82b8..df6052d2b2198 100644 --- a/pkg/planner/cardinality/cross_estimation.go +++ b/pkg/planner/cardinality/cross_estimation.go @@ -152,7 +152,7 @@ func crossEstimateRowCount(sctx context.PlanContext, return 0, err == nil, corr } idxID := int64(-1) - idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID] + idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colID] if idxExists && len(idxIDs) > 0 { idxID = idxIDs[0] } diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go index ab70ca6178f20..4b346bcb231f7 100644 --- a/pkg/planner/cardinality/row_count_index.go +++ b/pkg/planner/cardinality/row_count_index.go @@ -170,7 +170,7 @@ func getIndexRowCountForStatsV1(sctx context.PlanContext, coll *statistics.HistC } var count float64 var err error - colIDs := coll.Idx2ColumnIDs[idxID] + colIDs := coll.Idx2ColUniqueIDs[idxID] var colID int64 if rangePosition >= len(colIDs) { colID = -1 @@ -178,7 +178,7 @@ func getIndexRowCountForStatsV1(sctx context.PlanContext, coll *statistics.HistC colID = colIDs[rangePosition] } // prefer index stats over column stats - if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 { + if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && len(idxIDs) > 0 { idxID := idxIDs[0] count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}) } else { @@ -422,7 +422,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll Collators: make([]collate.Collator, 1), }, } - colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID] + colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID] singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal)) // The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like: // 1. Calc the selectivity of each column. @@ -449,7 +449,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan) selectivity = count / float64(coll.RealtimeCount) } - if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { + if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { // Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call // `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This // check avoids infinite recursion. diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go index c4c65788facb2..bc08a27d4f61f 100644 --- a/pkg/planner/cardinality/selectivity.go +++ b/pkg/planner/cardinality/selectivity.go @@ -182,7 +182,7 @@ func Selectivity( }) continue } - idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColumnIDs[id], id2Paths[idxStats.ID]) + idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColUniqueIDs[id], id2Paths[idxStats.ID]) if len(idxCols) > 0 { lengths := make([]int, 0, len(idxCols)) for i := 0; i < len(idxCols) && i < len(idxStats.Info.Columns); i++ { @@ -919,7 +919,7 @@ func findAvailableStatsForCol(sctx context.PlanContext, coll *statistics.HistCol return false, uniqueID } // try to find available stats in single column index stats (except for prefix index) - for idxStatsIdx, cols := range coll.Idx2ColumnIDs { + for idxStatsIdx, cols := range coll.Idx2ColUniqueIDs { if len(cols) == 1 && cols[0] == uniqueID { idxStats := coll.Indices[idxStatsIdx] if !statistics.IndexStatsIsInvalid(sctx, idxStats, coll, idxStatsIdx) && @@ -968,7 +968,7 @@ func getEqualCondSelectivity(sctx context.PlanContext, coll *statistics.HistColl return outOfRangeEQSelectivity(sctx, idx.NDV, realtimeCnt, int64(idx.TotalRowCount())), nil } // The equal condition only uses prefix columns of the index. - colIDs := coll.Idx2ColumnIDs[idx.ID] + colIDs := coll.Idx2ColUniqueIDs[idx.ID] var ndv int64 for i, colID := range colIDs { if i >= usedColsLen { @@ -1050,7 +1050,7 @@ func crossValidationSelectivity( }() } minRowCount = math.MaxFloat64 - cols := coll.Idx2ColumnIDs[idx.ID] + cols := coll.Idx2ColUniqueIDs[idx.ID] crossValidationSelectivity = 1.0 totalRowCount := idx.TotalRowCount() for i, colID := range cols { diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go index c41a956abfe83..b9503ef045462 100644 --- a/pkg/planner/cardinality/selectivity_test.go +++ b/pkg/planner/cardinality/selectivity_test.go @@ -893,8 +893,8 @@ func generateMapsForMockStatsTbl(statsTbl *statistics.Table) { for _, idxIDs := range colID2IdxIDs { slices.Sort(idxIDs) } - statsTbl.Idx2ColumnIDs = idx2Columns - statsTbl.ColID2IdxIDs = colID2IdxIDs + statsTbl.Idx2ColUniqueIDs = idx2Columns + statsTbl.ColUniqueID2IdxIDs = colID2IdxIDs } func TestIssue39593(t *testing.T) { diff --git a/pkg/planner/core/exhaust_physical_plans.go b/pkg/planner/core/exhaust_physical_plans.go index 9c4cedf0901a0..59effe27bf047 100644 --- a/pkg/planner/core/exhaust_physical_plans.go +++ b/pkg/planner/core/exhaust_physical_plans.go @@ -1166,7 +1166,7 @@ func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.Hist // 2. Try to get NDV from index stats. // Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is // equal or less than the corresponding normal index, and that's safe here since we want a lower bound. - for idxID, idxCols := range histColl.Idx2ColumnIDs { + for idxID, idxCols := range histColl.Idx2ColUniqueIDs { if len(idxCols) != len(colUIDs) { continue } diff --git a/pkg/planner/core/logical_plans.go b/pkg/planner/core/logical_plans.go index 673c078b7eb77..413261a2e9e0d 100644 --- a/pkg/planner/core/logical_plans.go +++ b/pkg/planner/core/logical_plans.go @@ -1798,8 +1798,8 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex path.IdxCols = append(path.IdxCols, handleCol) path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength) // Also updates the map that maps the index id to its prefix column ids. - if len(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID]) == len(path.Index.Columns) { - ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID], handleCol.UniqueID) + if len(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) { + ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID) } } } diff --git a/pkg/planner/core/stats.go b/pkg/planner/core/stats.go index 6e08615b7d787..b59af3cb52a5c 100644 --- a/pkg/planner/core/stats.go +++ b/pkg/planner/core/stats.go @@ -176,8 +176,8 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property. tbl := ds.tableStats.HistColl ndvs := make([]property.GroupNDV, 0, len(colGroups)) for idxID, idx := range tbl.Indices { - colsLen := len(tbl.Idx2ColumnIDs[idxID]) - // tbl.Idx2ColumnIDs may only contain the prefix of index columns. + colsLen := len(tbl.Idx2ColUniqueIDs[idxID]) + // tbl.Idx2ColUniqueIDs may only contain the prefix of index columns. // But it may exceeds the total index since the index would contain the handle column if it's not a unique index. // We append the handle at fillIndexPath. if colsLen < len(idx.Info.Columns) { @@ -186,7 +186,7 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property. colsLen-- } idxCols := make([]int64, colsLen) - copy(idxCols, tbl.Idx2ColumnIDs[idxID]) + copy(idxCols, tbl.Idx2ColUniqueIDs[idxID]) slices.Sort(idxCols) for _, g := range colGroups { // We only want those exact matches. diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 7df0c9b91601c..30600fa9c9b3d 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -218,15 +218,15 @@ type HistColl struct { // Note that Column use UniqueID as the key while Indices use the index ID in the metadata. Columns map[int64]*Column Indices map[int64]*Index - // Idx2ColumnIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner. - Idx2ColumnIDs map[int64][]int64 - // ColID2IdxIDs maps the column UniqueID to a list index ids whose first column is it. + // Idx2ColUniqueIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner. + Idx2ColUniqueIDs map[int64][]int64 + // ColUniqueID2IdxIDs maps the column UniqueID to a list index ids whose first column is it. // It's used to calculate the selectivity in planner. - ColID2IdxIDs map[int64][]int64 + ColUniqueID2IdxIDs map[int64][]int64 // UniqueID2colInfoID maps the column UniqueID to its ID in the metadata. UniqueID2colInfoID map[int64]int64 // MVIdx2Columns maps the index id to its columns by expression.Column. - // For normal index, the column id is enough, as we already have in Idx2ColumnIDs. But currently, mv index needs more + // For normal index, the column id is enough, as we already have in Idx2ColUniqueIDs. But currently, mv index needs more // information to match the filter against the mv index columns, and we need this map to provide this information. MVIdx2Columns map[int64][]*expression.Column PhysicalID int64 @@ -804,7 +804,7 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl { return newColl } -// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter. +// GenerateHistCollFromColumnInfo generates a new HistColl whose ColUniqueID2IdxIDs and Idx2ColUniqueIDs is built from the given parameter. func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, columns []*expression.Column) *HistColl { newColHistMap := make(map[int64]*Column) colInfoID2UniqueID := make(map[int64]int64, len(columns)) @@ -866,8 +866,8 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, c ModifyCount: coll.ModifyCount, Columns: newColHistMap, Indices: newIdxHistMap, - ColID2IdxIDs: colID2IdxIDs, - Idx2ColumnIDs: idx2Columns, + ColUniqueID2IdxIDs: colID2IdxIDs, + Idx2ColUniqueIDs: idx2Columns, UniqueID2colInfoID: uniqueID2colInfoID, MVIdx2Columns: mvIdx2Columns, } From 51918dfbcc7ff5070a171233b50cfaa5c645835a Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Fri, 29 Mar 2024 19:51:41 +0800 Subject: [PATCH 5/9] update comments --- pkg/statistics/table.go | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 30600fa9c9b3d..d2e9079731b9a 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -215,9 +215,27 @@ const ( // HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity. type HistColl struct { - // Note that Column use UniqueID as the key while Indices use the index ID in the metadata. - Columns map[int64]*Column - Indices map[int64]*Index + // Note that when used in a query, Column use UniqueID as the key while Indices use the index ID in the + // metadata. (See GenerateHistCollFromColumnInfo() for details) + Columns map[int64]*Column + Indices map[int64]*Index + PhysicalID int64 + // TODO: add AnalyzeCount here + RealtimeCount int64 // RealtimeCount is the current table row count, maintained by applying stats delta based on AnalyzeCount. + ModifyCount int64 // Total modify count in a table. + + // The version of the statistics, refer to Version0, Version1, Version2 and so on. + StatsVer int + // HavePhysicalID is true means this HistColl is from single table and have its ID's information. + // The physical id is used when try to load column stats from storage. + HavePhysicalID bool + Pseudo bool + + /* + Fields below are only used in a query, like for estimation, and they will be useless when stored in + the stats cache. (See GenerateHistCollFromColumnInfo() for details) + */ + // Idx2ColUniqueIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner. Idx2ColUniqueIDs map[int64][]int64 // ColUniqueID2IdxIDs maps the column UniqueID to a list index ids whose first column is it. @@ -228,18 +246,7 @@ type HistColl struct { // MVIdx2Columns maps the index id to its columns by expression.Column. // For normal index, the column id is enough, as we already have in Idx2ColUniqueIDs. But currently, mv index needs more // information to match the filter against the mv index columns, and we need this map to provide this information. - MVIdx2Columns map[int64][]*expression.Column - PhysicalID int64 - // TODO: add AnalyzeCount here - RealtimeCount int64 // RealtimeCount is the current table row count, maintained by applying stats delta based on AnalyzeCount. - ModifyCount int64 // Total modify count in a table. - - // The version of the statistics, refer to Version0, Version1, Version2 and so on. - StatsVer int - // HavePhysicalID is true means this HistColl is from single table and have its ID's information. - // The physical id is used when try to load column stats from storage. - HavePhysicalID bool - Pseudo bool + MVIdx2Columns map[int64][]*expression.Column CanNotTriggerLoad bool } From 9808c887b49eb6ac5f5c1b99fb8ca8436ec7b253 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Fri, 29 Mar 2024 20:35:53 +0800 Subject: [PATCH 6/9] fix lint --- pkg/statistics/table.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index d2e9079731b9a..1938508462b6b 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -236,6 +236,7 @@ type HistColl struct { the stats cache. (See GenerateHistCollFromColumnInfo() for details) */ + CanNotTriggerLoad bool // Idx2ColUniqueIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner. Idx2ColUniqueIDs map[int64][]int64 // ColUniqueID2IdxIDs maps the column UniqueID to a list index ids whose first column is it. @@ -247,7 +248,6 @@ type HistColl struct { // For normal index, the column id is enough, as we already have in Idx2ColUniqueIDs. But currently, mv index needs more // information to match the filter against the mv index columns, and we need this map to provide this information. MVIdx2Columns map[int64][]*expression.Column - CanNotTriggerLoad bool } // TableMemoryUsage records tbl memory usage From a320f42881b46d3ff3f2f07ccfc410e313152172 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Fri, 29 Mar 2024 20:39:44 +0800 Subject: [PATCH 7/9] fix lint --- pkg/statistics/table.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 1938508462b6b..b4dba2732414b 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -247,7 +247,7 @@ type HistColl struct { // MVIdx2Columns maps the index id to its columns by expression.Column. // For normal index, the column id is enough, as we already have in Idx2ColUniqueIDs. But currently, mv index needs more // information to match the filter against the mv index columns, and we need this map to provide this information. - MVIdx2Columns map[int64][]*expression.Column + MVIdx2Columns map[int64][]*expression.Column } // TableMemoryUsage records tbl memory usage From 65c5878c8490673767970c745b68f8fdfce11480 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Fri, 29 Mar 2024 20:54:04 +0800 Subject: [PATCH 8/9] disable fieldalignment check for table.go --- build/nogo_config.json | 1 + 1 file changed, 1 insertion(+) diff --git a/build/nogo_config.json b/build/nogo_config.json index 576d7166d5bc1..e8db6c287f569 100644 --- a/build/nogo_config.json +++ b/build/nogo_config.json @@ -175,6 +175,7 @@ "fieldalignment": { "exclude_files": { "pkg/parser/parser.go": "parser/parser.go code", + "pkg/statistics/table.go": "disable this limitation that prevents us from splitting struct fields for clarity", "external/": "no need to vet third party code", ".*_generated\\.go$": "ignore generated code", ".*mock.go$": "ignore generated code", From 6758bd74561dd0d5a69b8a95421ce1255bb60647 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Mon, 1 Apr 2024 17:25:46 +0800 Subject: [PATCH 9/9] update var name according to comments --- pkg/planner/cardinality/cross_estimation.go | 8 +++---- pkg/planner/cardinality/row_count_column.go | 24 ++++++++++----------- pkg/planner/cardinality/row_count_index.go | 14 ++++++------ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pkg/planner/cardinality/cross_estimation.go b/pkg/planner/cardinality/cross_estimation.go index df6052d2b2198..3aa618e3307df 100644 --- a/pkg/planner/cardinality/cross_estimation.go +++ b/pkg/planner/cardinality/cross_estimation.go @@ -139,7 +139,7 @@ func crossEstimateRowCount(sctx context.PlanContext, if col == nil || len(path.AccessConds) > 0 { return 0, false, corr } - colID := col.UniqueID + colUniqueID := col.UniqueID if corr < 0 { desc = !desc } @@ -152,11 +152,11 @@ func crossEstimateRowCount(sctx context.PlanContext, return 0, err == nil, corr } idxID := int64(-1) - idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colID] + idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colUniqueID] if idxExists && len(idxIDs) > 0 { idxID = idxIDs[0] } - rangeCounts, ok := getColumnRangeCounts(sctx, colID, ranges, dsTableStats.HistColl, idxID) + rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID) if !ok { return 0, false, corr } @@ -168,7 +168,7 @@ func crossEstimateRowCount(sctx context.PlanContext, if idxExists { rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges) } else { - rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colID, convertedRanges) + rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges) } if err != nil { return 0, false, corr diff --git a/pkg/planner/cardinality/row_count_column.go b/pkg/planner/cardinality/row_count_column.go index 4373cc86e41c6..fad100d82c358 100644 --- a/pkg/planner/cardinality/row_count_column.go +++ b/pkg/planner/cardinality/row_count_column.go @@ -33,27 +33,27 @@ func init() { } // GetRowCountByColumnRanges estimates the row count by a slice of Range. -func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) { +func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) { var name string if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { debugtrace.EnterContextCommon(sctx) - debugTraceGetRowCountInput(sctx, colID, colRanges) + debugTraceGetRowCountInput(sctx, colUniqueID, colRanges) defer func() { debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result) debugtrace.LeaveContextCommon(sctx) }() } sc := sctx.GetSessionVars().StmtCtx - c, ok := coll.Columns[colID] - colInfoID := colID + c, ok := coll.Columns[colUniqueID] + colInfoID := colUniqueID if len(coll.UniqueID2colInfoID) > 0 { - colInfoID = coll.UniqueID2colInfoID[colID] + colInfoID = coll.UniqueID2colInfoID[colUniqueID] } recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) if c != nil && c.Info != nil { name = c.Info.Name.O } - if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) { + if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) { result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0) if err == nil && sc.EnableOptimizerCETrace && ok { ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result)) @@ -75,27 +75,27 @@ func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistCo } // GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange. -func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) { +func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) { var name string if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { debugtrace.EnterContextCommon(sctx) - debugTraceGetRowCountInput(sctx, colID, intRanges) + debugTraceGetRowCountInput(sctx, colUniqueID, intRanges) defer func() { debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result) debugtrace.LeaveContextCommon(sctx) }() } sc := sctx.GetSessionVars().StmtCtx - c, ok := coll.Columns[colID] - colInfoID := colID + c, ok := coll.Columns[colUniqueID] + colInfoID := colUniqueID if len(coll.UniqueID2colInfoID) > 0 { - colInfoID = coll.UniqueID2colInfoID[colID] + colInfoID = coll.UniqueID2colInfoID[colUniqueID] } recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) if c != nil && c.Info != nil { name = c.Info.Name.O } - if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) { + if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) { if len(intRanges) == 0 { return 0, nil } diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go index 4b346bcb231f7..692696b3e1b9d 100644 --- a/pkg/planner/cardinality/row_count_index.go +++ b/pkg/planner/cardinality/row_count_index.go @@ -170,19 +170,19 @@ func getIndexRowCountForStatsV1(sctx context.PlanContext, coll *statistics.HistC } var count float64 var err error - colIDs := coll.Idx2ColUniqueIDs[idxID] - var colID int64 - if rangePosition >= len(colIDs) { - colID = -1 + colUniqueIDs := coll.Idx2ColUniqueIDs[idxID] + var colUniqueID int64 + if rangePosition >= len(colUniqueIDs) { + colUniqueID = -1 } else { - colID = colIDs[rangePosition] + colUniqueID = colUniqueIDs[rangePosition] } // prefer index stats over column stats - if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && len(idxIDs) > 0 { + if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 { idxID := idxIDs[0] count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}) } else { - count, err = GetRowCountByColumnRanges(sctx, coll, colID, []*ranger.Range{&rang}) + count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang}) } if err != nil { return 0, errors.Trace(err)