From 05e413e7646603b5ba7a72856c5065b6b11c2ebb Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:54:17 +0800 Subject: [PATCH] This is an automated cherry-pick of #52208 Signed-off-by: ti-chi-bot --- build/nogo_config.json | 1 + pkg/planner/cardinality/cross_estimation.go | 8 ++-- pkg/planner/cardinality/row_count_column.go | 37 ++++++++++++--- pkg/planner/cardinality/row_count_index.go | 18 +++---- pkg/planner/cardinality/selectivity.go | 25 ++++++++-- pkg/planner/cardinality/selectivity_test.go | 4 +- .../core/casetest/planstats/BUILD.bazel | 2 +- .../casetest/planstats/plan_stats_test.go | 45 ++++++++++++++++++ .../testdata/plan_stats_suite_in.json | 8 ++++ .../testdata/plan_stats_suite_out.json | 42 +++++++++++++++++ pkg/planner/core/exhaust_physical_plans.go | 2 +- pkg/planner/core/logical_plans.go | 4 +- pkg/planner/core/stats.go | 6 +-- pkg/statistics/table.go | 47 ++++++++++++++++++- 14 files changed, 217 insertions(+), 32 deletions(-) diff --git a/build/nogo_config.json b/build/nogo_config.json index ceafdfa6e83ef..83c36265b5c55 100644 --- a/build/nogo_config.json +++ b/build/nogo_config.json @@ -169,6 +169,7 @@ "fieldalignment": { "exclude_files": { "pkg/parser/parser.go": "parser/parser.go code", + "pkg/statistics/table.go": "disable this limitation that prevents us from splitting struct fields for clarity", "external/": "no need to vet third party code", ".*_generated\\.go$": "ignore generated code", ".*_/testmain\\.go$": "ignore code", diff --git a/pkg/planner/cardinality/cross_estimation.go b/pkg/planner/cardinality/cross_estimation.go index f25bad34acb56..850a0faa05e01 100644 --- a/pkg/planner/cardinality/cross_estimation.go +++ b/pkg/planner/cardinality/cross_estimation.go @@ -126,7 +126,7 @@ func crossEstimateRowCount(sctx sessionctx.Context, if col == nil || len(path.AccessConds) > 0 { return 0, false, corr } - colID := col.UniqueID + colUniqueID := col.UniqueID if corr < 0 { desc = !desc } @@ -139,11 +139,11 @@ func crossEstimateRowCount(sctx sessionctx.Context, return 0, err == nil, corr } idxID := int64(-1) - idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID] + idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colUniqueID] if idxExists && len(idxIDs) > 0 { idxID = idxIDs[0] } - rangeCounts, ok := getColumnRangeCounts(sctx, colID, ranges, dsTableStats.HistColl, idxID) + rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID) if !ok { return 0, false, corr } @@ -155,7 +155,7 @@ func crossEstimateRowCount(sctx sessionctx.Context, if idxExists { rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges) } else { - rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colID, convertedRanges) + rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges) } if err != nil { return 0, false, corr diff --git a/pkg/planner/cardinality/row_count_column.go b/pkg/planner/cardinality/row_count_column.go index f9f075dc818f6..d135ac597f09d 100644 --- a/pkg/planner/cardinality/row_count_column.go +++ b/pkg/planner/cardinality/row_count_column.go @@ -33,24 +33,37 @@ func init() { } // GetRowCountByColumnRanges estimates the row count by a slice of Range. +<<<<<<< HEAD func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) { +======= +func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) { +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) var name string if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { debugtrace.EnterContextCommon(sctx) - debugTraceGetRowCountInput(sctx, colID, colRanges) + debugTraceGetRowCountInput(sctx, colUniqueID, colRanges) defer func() { debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result) debugtrace.LeaveContextCommon(sctx) }() } sc := sctx.GetSessionVars().StmtCtx - c, ok := coll.Columns[colID] - recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID) + c, ok := coll.Columns[colUniqueID] + colInfoID := colUniqueID + if len(coll.UniqueID2colInfoID) > 0 { + colInfoID = coll.UniqueID2colInfoID[colUniqueID] + } + recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) if c != nil && c.Info != nil { name = c.Info.Name.O } +<<<<<<< HEAD if !ok || c.IsInvalid(sctx, coll.Pseudo) { result, err = getPseudoRowCountByColumnRanges(sc, float64(coll.RealtimeCount), colRanges, 0) +======= + if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) { + result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0) +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) if err == nil && sc.EnableOptimizerCETrace && ok { ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result)) } @@ -71,23 +84,35 @@ func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistCol } // GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange. +<<<<<<< HEAD func GetRowCountByIntColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) { +======= +func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) { +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) var name string if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { debugtrace.EnterContextCommon(sctx) - debugTraceGetRowCountInput(sctx, colID, intRanges) + debugTraceGetRowCountInput(sctx, colUniqueID, intRanges) defer func() { debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result) debugtrace.LeaveContextCommon(sctx) }() } sc := sctx.GetSessionVars().StmtCtx - c, ok := coll.Columns[colID] - recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID) + c, ok := coll.Columns[colUniqueID] + colInfoID := colUniqueID + if len(coll.UniqueID2colInfoID) > 0 { + colInfoID = coll.UniqueID2colInfoID[colUniqueID] + } + recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) if c != nil && c.Info != nil { name = c.Info.Name.O } +<<<<<<< HEAD if !ok || c.IsInvalid(sctx, coll.Pseudo) { +======= + if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) { +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) if len(intRanges) == 0 { return 0, nil } diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go index 06ff7c9627cf3..e3334dc6455ee 100644 --- a/pkg/planner/cardinality/row_count_index.go +++ b/pkg/planner/cardinality/row_count_index.go @@ -168,19 +168,19 @@ func getIndexRowCountForStatsV1(sctx sessionctx.Context, coll *statistics.HistCo } var count float64 var err error - colIDs := coll.Idx2ColumnIDs[idxID] - var colID int64 - if rangePosition >= len(colIDs) { - colID = -1 + colUniqueIDs := coll.Idx2ColUniqueIDs[idxID] + var colUniqueID int64 + if rangePosition >= len(colUniqueIDs) { + colUniqueID = -1 } else { - colID = colIDs[rangePosition] + colUniqueID = colUniqueIDs[rangePosition] } // prefer index stats over column stats - if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 { + if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 { idxID := idxIDs[0] count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}) } else { - count, err = GetRowCountByColumnRanges(sctx, coll, colID, []*ranger.Range{&rang}) + count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang}) } if err != nil { return 0, errors.Trace(err) @@ -409,7 +409,7 @@ func expBackoffEstimation(sctx sessionctx.Context, idx *statistics.Index, coll * Collators: make([]collate.Collator, 1), }, } - colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID] + colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID] singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal)) // The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like: // 1. Calc the selectivity of each column. @@ -434,7 +434,7 @@ func expBackoffEstimation(sctx sessionctx.Context, idx *statistics.Index, coll * foundStats = true count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan) } - if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { + if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { // Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call // `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This // check avoids infinite recursion. diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go index d2d5d8ff8041a..c8b32308e66fc 100644 --- a/pkg/planner/cardinality/selectivity.go +++ b/pkg/planner/cardinality/selectivity.go @@ -157,7 +157,26 @@ func Selectivity( slices.Sort(idxIDs) for _, id := range idxIDs { idxStats := coll.Indices[id] +<<<<<<< HEAD idxCols := findPrefixOfIndexByCol(extractedCols, coll.Idx2ColumnIDs[id], id2Paths[idxStats.ID]) +======= + idxInfo := idxStats.Info + if idxInfo.MVIndex { + totalSelectivity, mask, ok := getMaskAndSelectivityForMVIndex(ctx, coll, id, remainedExprs) + if !ok { + continue + } + nodes = append(nodes, &StatsNode{ + Tp: IndexType, + ID: id, + mask: mask, + numCols: len(idxInfo.Columns), + Selectivity: totalSelectivity, + }) + continue + } + idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColUniqueIDs[id], id2Paths[idxStats.ID]) +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) if len(idxCols) > 0 { lengths := make([]int, 0, len(idxCols)) for i := 0; i < len(idxCols) && i < len(idxStats.Info.Columns); i++ { @@ -777,7 +796,7 @@ func findAvailableStatsForCol(sctx sessionctx.Context, coll *statistics.HistColl return false, uniqueID } // try to find available stats in single column index stats (except for prefix index) - for idxStatsIdx, cols := range coll.Idx2ColumnIDs { + for idxStatsIdx, cols := range coll.Idx2ColUniqueIDs { if len(cols) == 1 && cols[0] == uniqueID { idxStats, ok := coll.Indices[idxStatsIdx] if ok && @@ -826,7 +845,7 @@ func getEqualCondSelectivity(sctx sessionctx.Context, coll *statistics.HistColl, return outOfRangeEQSelectivity(sctx, idx.NDV, coll.RealtimeCount, int64(idx.TotalRowCount())), nil } // The equal condition only uses prefix columns of the index. - colIDs := coll.Idx2ColumnIDs[idx.ID] + colIDs := coll.Idx2ColUniqueIDs[idx.ID] var ndv int64 for i, colID := range colIDs { if i >= usedColsLen { @@ -908,7 +927,7 @@ func crossValidationSelectivity( }() } minRowCount = math.MaxFloat64 - cols := coll.Idx2ColumnIDs[idx.ID] + cols := coll.Idx2ColUniqueIDs[idx.ID] crossValidationSelectivity = 1.0 totalRowCount := idx.TotalRowCount() for i, colID := range cols { diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go index 2706e1d89e6da..a241e38cbd2a3 100644 --- a/pkg/planner/cardinality/selectivity_test.go +++ b/pkg/planner/cardinality/selectivity_test.go @@ -893,8 +893,8 @@ func generateMapsForMockStatsTbl(statsTbl *statistics.Table) { for _, idxIDs := range colID2IdxIDs { slices.Sort(idxIDs) } - statsTbl.Idx2ColumnIDs = idx2Columns - statsTbl.ColID2IdxIDs = colID2IdxIDs + statsTbl.Idx2ColUniqueIDs = idx2Columns + statsTbl.ColUniqueID2IdxIDs = colID2IdxIDs } func TestIssue39593(t *testing.T) { diff --git a/pkg/planner/core/casetest/planstats/BUILD.bazel b/pkg/planner/core/casetest/planstats/BUILD.bazel index 094d33c59f3ac..4ecf0a6a2dcfb 100644 --- a/pkg/planner/core/casetest/planstats/BUILD.bazel +++ b/pkg/planner/core/casetest/planstats/BUILD.bazel @@ -9,7 +9,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 4, + shard_count = 5, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/core/casetest/planstats/plan_stats_test.go b/pkg/planner/core/casetest/planstats/plan_stats_test.go index dde452f1ba820..3e24920644f7d 100644 --- a/pkg/planner/core/casetest/planstats/plan_stats_test.go +++ b/pkg/planner/core/casetest/planstats/plan_stats_test.go @@ -400,3 +400,48 @@ func TestCollectDependingVirtualCols(t *testing.T) { require.Equal(t, output[i].OutputColNames, cols) } } + +func TestPartialStatsInExplain(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))") + tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)") + tk.MustExec("create table t2(a int, primary key(a))") + tk.MustExec("insert into t2 values (1),(2),(3)") + tk.MustExec( + "create table tp(a int, b int, c int, index ic(c)) partition by range(a)" + + "(partition p0 values less than (10)," + + "partition p1 values less than (20)," + + "partition p2 values less than maxvalue)", + ) + tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)") + + oriLease := dom.StatsHandle().Lease() + dom.StatsHandle().SetLease(1) + defer func() { + dom.StatsHandle().SetLease(oriLease) + }() + tk.MustExec("analyze table t") + tk.MustExec("analyze table t2") + tk.MustExec("analyze table tp") + tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema())) + tk.MustQuery("explain select * from tp where a = 1") + tk.MustExec("set @@tidb_stats_load_sync_wait = 0") + var ( + input []string + output []struct { + Query string + Result []string + } + ) + testData := GetPlanStatsData() + testData.LoadTestCases(t, &input, &output) + for i, sql := range input { + testdata.OnRecord(func() { + output[i].Query = input[i] + output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows()) + }) + tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...)) + } +} diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json index c9496da4c8a0a..8f209105e6965 100644 --- a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json +++ b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json @@ -62,5 +62,13 @@ ] } ] + }, + { + "name": "TestPartialStatsInExplain", + "cases": [ + "explain format = brief select * from tp where b = 10", + "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c", + "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c" + ] } ] diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json index 93552f8bed709..92f1647bb88d7 100644 --- a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json +++ b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json @@ -101,5 +101,47 @@ ] } ] + }, + { + "Name": "TestPartialStatsInExplain", + "Cases": [ + { + "Query": "explain format = brief select * from tp where b = 10", + "Result": [ + "TableReader 0.01 root partition:all data:Selection", + "└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)", + " └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]" + ] + }, + { + "Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c", + "Result": [ + "Projection 0.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c", + "└─HashJoin 0.00 root inner join, equal:[eq(test.tp.c, test.t.b)]", + " ├─TableReader(Build) 0.00 root partition:p1 data:Selection", + " │ └─Selection 0.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))", + " │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]", + " └─TableReader(Probe) 3.00 root data:Selection", + " └─Selection 3.00 cop[tikv] not(isnull(test.t.b))", + " └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]" + ] + }, + { + "Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c", + "Result": [ + "HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]", + "├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)", + "│ ├─TableReader(Build) 0.33 root data:Selection", + "│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))", + "│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]", + "│ └─IndexLookUp(Probe) 0.33 root partition:p0 ", + "│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))", + "│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]", + "│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]", + "└─TableReader(Probe) 1.00 root data:TableRangeScan", + " └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]" + ] + } + ] } ] diff --git a/pkg/planner/core/exhaust_physical_plans.go b/pkg/planner/core/exhaust_physical_plans.go index f78809ff0b097..8fab591826777 100644 --- a/pkg/planner/core/exhaust_physical_plans.go +++ b/pkg/planner/core/exhaust_physical_plans.go @@ -1211,7 +1211,7 @@ func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.Hist // 2. Try to get NDV from index stats. // Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is // equal or less than the corresponding normal index, and that's safe here since we want a lower bound. - for idxID, idxCols := range histColl.Idx2ColumnIDs { + for idxID, idxCols := range histColl.Idx2ColUniqueIDs { if len(idxCols) != len(colUIDs) { continue } diff --git a/pkg/planner/core/logical_plans.go b/pkg/planner/core/logical_plans.go index b10fdc3a11d5c..90fa385f9873d 100644 --- a/pkg/planner/core/logical_plans.go +++ b/pkg/planner/core/logical_plans.go @@ -1830,8 +1830,8 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex path.IdxCols = append(path.IdxCols, handleCol) path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength) // Also updates the map that maps the index id to its prefix column ids. - if len(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID]) == len(path.Index.Columns) { - ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID], handleCol.UniqueID) + if len(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) { + ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID) } } } diff --git a/pkg/planner/core/stats.go b/pkg/planner/core/stats.go index 19b66dea6f535..9c81c95ec4454 100644 --- a/pkg/planner/core/stats.go +++ b/pkg/planner/core/stats.go @@ -175,8 +175,8 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property. tbl := ds.tableStats.HistColl ndvs := make([]property.GroupNDV, 0, len(colGroups)) for idxID, idx := range tbl.Indices { - colsLen := len(tbl.Idx2ColumnIDs[idxID]) - // tbl.Idx2ColumnIDs may only contain the prefix of index columns. + colsLen := len(tbl.Idx2ColUniqueIDs[idxID]) + // tbl.Idx2ColUniqueIDs may only contain the prefix of index columns. // But it may exceeds the total index since the index would contain the handle column if it's not a unique index. // We append the handle at fillIndexPath. if colsLen < len(idx.Info.Columns) { @@ -185,7 +185,7 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property. colsLen-- } idxCols := make([]int64, colsLen) - copy(idxCols, tbl.Idx2ColumnIDs[idxID]) + copy(idxCols, tbl.Idx2ColUniqueIDs[idxID]) slices.Sort(idxCols) for _, g := range colGroups { // We only want those exact matches. diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 64f017e17a353..36768e2a4b46a 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -100,6 +100,7 @@ const ( // HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity. type HistColl struct { +<<<<<<< HEAD Columns map[int64]*Column Indices map[int64]*Index // Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner. @@ -107,6 +108,13 @@ type HistColl struct { // ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner. ColID2IdxIDs map[int64][]int64 PhysicalID int64 +======= + // Note that when used in a query, Column use UniqueID as the key while Indices use the index ID in the + // metadata. (See GenerateHistCollFromColumnInfo() for details) + Columns map[int64]*Column + Indices map[int64]*Index + PhysicalID int64 +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) // TODO: add AnalyzeCount here RealtimeCount int64 // RealtimeCount is the current table row count, maintained by applying stats delta based on AnalyzeCount. ModifyCount int64 // Total modify count in a table. @@ -115,6 +123,27 @@ type HistColl struct { // The physical id is used when try to load column stats from storage. HavePhysicalID bool Pseudo bool +<<<<<<< HEAD +======= + + /* + Fields below are only used in a query, like for estimation, and they will be useless when stored in + the stats cache. (See GenerateHistCollFromColumnInfo() for details) + */ + + CanNotTriggerLoad bool + // Idx2ColUniqueIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner. + Idx2ColUniqueIDs map[int64][]int64 + // ColUniqueID2IdxIDs maps the column UniqueID to a list index ids whose first column is it. + // It's used to calculate the selectivity in planner. + ColUniqueID2IdxIDs map[int64][]int64 + // UniqueID2colInfoID maps the column UniqueID to its ID in the metadata. + UniqueID2colInfoID map[int64]int64 + // MVIdx2Columns maps the index id to its columns by expression.Column. + // For normal index, the column id is enough, as we already have in Idx2ColUniqueIDs. But currently, mv index needs more + // information to match the filter against the mv index columns, and we need this map to provide this information. + MVIdx2Columns map[int64][]*expression.Column +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) } // TableMemoryUsage records tbl memory usage @@ -547,13 +576,15 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl { return newColl } -// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter. +// GenerateHistCollFromColumnInfo generates a new HistColl whose ColUniqueID2IdxIDs and Idx2ColUniqueIDs is built from the given parameter. func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, columns []*expression.Column) *HistColl { newColHistMap := make(map[int64]*Column) colInfoID2UniqueID := make(map[int64]int64, len(columns)) + uniqueID2colInfoID := make(map[int64]int64, len(columns)) idxID2idxInfo := make(map[int64]*model.IndexInfo) for _, col := range columns { colInfoID2UniqueID[col.ID] = col.UniqueID + uniqueID2colInfoID[col.UniqueID] = col.ID } for id, colHist := range coll.Columns { uniqueID, ok := colInfoID2UniqueID[id] @@ -593,6 +624,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, c slices.Sort(idxIDs) } newColl := &HistColl{ +<<<<<<< HEAD PhysicalID: coll.PhysicalID, HavePhysicalID: coll.HavePhysicalID, Pseudo: coll.Pseudo, @@ -602,6 +634,19 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, c Indices: newIdxHistMap, ColID2IdxIDs: colID2IdxIDs, Idx2ColumnIDs: idx2Columns, +======= + PhysicalID: coll.PhysicalID, + HavePhysicalID: coll.HavePhysicalID, + Pseudo: coll.Pseudo, + RealtimeCount: coll.RealtimeCount, + ModifyCount: coll.ModifyCount, + Columns: newColHistMap, + Indices: newIdxHistMap, + ColUniqueID2IdxIDs: colID2IdxIDs, + Idx2ColUniqueIDs: idx2Columns, + UniqueID2colInfoID: uniqueID2colInfoID, + MVIdx2Columns: mvIdx2Columns, +>>>>>>> 21e9d3cb40a (planner, statistics: use the correct column ID when recording stats loading status (#52208)) } return newColl }