diff --git a/executor/test/analyzetest/analyze_test.go b/executor/test/analyzetest/analyze_test.go index 0728cae5657f6..3b97213a740b9 100644 --- a/executor/test/analyzetest/analyze_test.go +++ b/executor/test/analyzetest/analyze_test.go @@ -3226,8 +3226,9 @@ func TestAnalyzeMVIndex(t *testing.T) { "6 test t analyze index ij_char 189 finished", )) - // 3. check stats loading status and async load - // 3.1. now, stats on all indexes should be allEvicted, but these queries should trigger async loading + // 3. test stats loading + // 3.1. turn off sync loading, stats on all indexes should be allEvicted, but these queries should trigger async loading + tk.MustExec("set session tidb_stats_load_sync_wait = 0") tk.MustQuery("explain format = brief select * from t where 1 member of (j->'$.signed')").Check(testkit.Rows( "IndexMerge 0.03 root type: union", "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_signed(cast(json_extract(`j`, _utf8mb4'$.signed') as signed array)) range:[1,1], keep order:false, stats:partial[ia:allEvicted, ij_signed:allEvicted, j:unInitialized]", @@ -3282,7 +3283,34 @@ func TestAnalyzeMVIndex(t *testing.T) { "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[j:unInitialized]", )) + // 3.4. clean up the stats and re-analyze the table + tk.MustExec("drop stats t") + tk.MustExec("analyze table t with 1 samplerate, 3 topn") + // 3.5. turn on the sync loading, stats on mv indexes should be loaded + tk.MustExec("set session tidb_stats_load_sync_wait = 1000") + tk.MustQuery("explain format = brief select * from t where 1 member of (j->'$.signed')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_signed(cast(json_extract(`j`, _utf8mb4'$.signed') as signed array)) range:[1,1], keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where 1 member of (j->'$.unsigned')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_unsigned(cast(json_extract(`j`, _utf8mb4'$.unsigned') as unsigned array)) range:[1,1], keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where '1' member of (j->'$.bin')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_binary(cast(json_extract(`j`, _utf8mb4'$.bin') as binary(50) array)) range:[0x31,0x31], keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where '1' member of (j->'$.char')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_char(cast(json_extract(`j`, _utf8mb4'$.char') as char(50) array)) range:[0x31,0x31], keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + )) + // 4. check stats content in the memory + require.NoError(t, h.LoadNeededHistograms()) tk.MustQuery("show stats_meta").CheckAt([]int{0, 1, 4, 5}, testkit.Rows("test t 0 27")) tk.MustQuery("show stats_histograms").CheckAt([]int{0, 1, 3, 4, 6, 7, 8, 9, 10}, testkit.Rows( // db_name, table_name, column_name, is_index, distinct_count, null_count, avg_col_size, correlation, load_status @@ -3290,7 +3318,7 @@ func TestAnalyzeMVIndex(t *testing.T) { "test t ia 1 1 0 0 0 allLoaded", "test t ij_signed 1 11 0 0 0 allLoaded", "test t ij_unsigned 1 6 0 0 0 allLoaded", - "test t ij_double 1 7 0 0 0 allEvicted", + "test t ij_double 1 7 0 0 0 allLoaded", "test t ij_binary 1 15 0 0 0 allLoaded", "test t ij_char 1 11 0 0 0 allLoaded", )) @@ -3303,6 +3331,9 @@ func TestAnalyzeMVIndex(t *testing.T) { "test t ij_unsigned 1 0 27", "test t ij_unsigned 1 3 27", "test t ij_unsigned 1 4 27", + "test t ij_double 1 -21.5 27", + "test t ij_double 1 -12.000005 8", + "test t ij_double 1 0 27", "test t ij_binary 1 0000 26", "test t ij_binary 1 1234 19", "test t ij_binary 1 3796 1", @@ -3323,6 +3354,10 @@ func TestAnalyzeMVIndex(t *testing.T) { "test t ij_unsigned 1 0 16 16 12 12 0", "test t ij_unsigned 1 1 43 27 600 600 0", "test t ij_unsigned 1 2 54 11 3112 3112 0", + "test t ij_double 1 0 19 19 0.000005 0.000005 0", + "test t ij_double 1 1 46 27 2.15 2.15 0", + "test t ij_double 1 2 73 27 10.555555 10.555555 0", + "test t ij_double 1 3 92 19 10.9876 10.9876 0", "test t ij_binary 1 0 8 8 5678 5678 0", "test t ij_binary 1 1 35 27 aaaaaa aaaaaa 0", "test t ij_binary 1 2 59 24 asdf asdf 0", diff --git a/infoschema/infoschema.go b/infoschema/infoschema.go index 35b0c11b436b7..73ca30c2e31a6 100644 --- a/infoschema/infoschema.go +++ b/infoschema/infoschema.go @@ -761,3 +761,17 @@ func (ts *SessionExtendedInfoSchema) DetachTemporaryTableInfoSchema() *SessionEx MdlTables: ts.MdlTables, } } + +// FindTableByTblOrPartID looks for table.Table for the given id in the InfoSchema. +// The id can be either a table id or a partition id. +// If the id is a table id, the corresponding table.Table will be returned, and the second return value is nil. +// If the id is a partition id, the corresponding table.Table and PartitionDefinition will be returned. +// If the id is not found in the InfoSchema, nil will be returned for both return values. +func FindTableByTblOrPartID(is InfoSchema, id int64) (table.Table, *model.PartitionDefinition) { + tbl, ok := is.TableByID(id) + if ok { + return tbl, nil + } + tbl, _, partDef := is.FindTableByPartitionID(id) + return tbl, partDef +} diff --git a/planner/core/BUILD.bazel b/planner/core/BUILD.bazel index 20d64259f6261..f547b533ca764 100644 --- a/planner/core/BUILD.bazel +++ b/planner/core/BUILD.bazel @@ -214,7 +214,6 @@ go_test( "plan_cost_ver1_test.go", "plan_cost_ver2_test.go", "plan_replayer_capture_test.go", - "plan_stats_test.go", "plan_test.go", "plan_to_pb_test.go", "planbuilder_test.go", @@ -236,7 +235,6 @@ go_test( "//config", "//domain", "//errno", - "//executor", "//expression", "//expression/aggregation", "//infoschema", @@ -256,11 +254,9 @@ go_test( "//planner/util", "//session", "//sessionctx", - "//sessionctx/stmtctx", "//sessionctx/variable", "//sessiontxn", "//statistics", - "//statistics/handle", "//table", "//testkit", "//testkit/ddlhelper", diff --git a/planner/core/casetest/planstats/BUILD.bazel b/planner/core/casetest/planstats/BUILD.bazel new file mode 100644 index 0000000000000..954d4734fe294 --- /dev/null +++ b/planner/core/casetest/planstats/BUILD.bazel @@ -0,0 +1,34 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_test") + +go_test( + name = "planstats_test", + timeout = "short", + srcs = [ + "main_test.go", + "plan_stats_test.go", + ], + data = glob(["testdata/**"]), + flaky = True, + shard_count = 4, + deps = [ + "//config", + "//domain", + "//executor", + "//parser", + "//parser/model", + "//planner", + "//planner/core", + "//sessionctx", + "//sessionctx/stmtctx", + "//statistics", + "//statistics/handle", + "//table", + "//testkit", + "//testkit/testdata", + "//testkit/testmain", + "//testkit/testsetup", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_stretchr_testify//require", + "@org_uber_go_goleak//:goleak", + ], +) diff --git a/planner/core/casetest/planstats/main_test.go b/planner/core/casetest/planstats/main_test.go new file mode 100644 index 0000000000000..d474b1eb0c058 --- /dev/null +++ b/planner/core/casetest/planstats/main_test.go @@ -0,0 +1,54 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package planstats_test + +import ( + "flag" + "testing" + + "github.com/pingcap/tidb/testkit/testdata" + "github.com/pingcap/tidb/testkit/testmain" + "github.com/pingcap/tidb/testkit/testsetup" + "go.uber.org/goleak" +) + +var testDataMap = make(testdata.BookKeeper) + +func TestMain(m *testing.M) { + testsetup.SetupForCommonTest() + + flag.Parse() + testDataMap.LoadTestSuiteData("testdata", "plan_stats_suite") + + opts := []goleak.Option{ + goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"), + goleak.IgnoreTopFunction("github.com/lestrrat-go/httprc.runFetchWorker"), + goleak.IgnoreTopFunction("go.etcd.io/etcd/client/pkg/v3/logutil.(*MergeLogger).outputLoop"), + goleak.IgnoreTopFunction("gopkg.in/natefinch/lumberjack%2ev2.(*Logger).millRun"), + goleak.IgnoreTopFunction("github.com/tikv/client-go/v2/txnkv/transaction.keepAlive"), + goleak.IgnoreTopFunction("go.opencensus.io/stats/view.(*worker).start"), + } + + callback := func(i int) int { + testDataMap.GenerateOutputIfNeeded() + return i + } + + goleak.VerifyTestMain(testmain.WrapTestingM(m, callback), opts...) +} + +func GetPlanStatsData() testdata.TestData { + return testDataMap["plan_stats_suite"] +} diff --git a/planner/core/plan_stats_test.go b/planner/core/casetest/planstats/plan_stats_test.go similarity index 84% rename from planner/core/plan_stats_test.go rename to planner/core/casetest/planstats/plan_stats_test.go index 3de8b4dfc0fa5..a19539fe3ccbf 100644 --- a/planner/core/plan_stats_test.go +++ b/planner/core/casetest/planstats/plan_stats_test.go @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package core_test +package planstats_test import ( "context" "fmt" + "slices" "testing" "time" @@ -32,7 +33,9 @@ import ( "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/statistics/handle" + "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/testkit" + "github.com/pingcap/tidb/testkit/testdata" "github.com/stretchr/testify/require" ) @@ -324,3 +327,76 @@ func TestPlanStatsStatusRecord(t *testing.T) { } } } + +func TestCollectDependingVirtualCols(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c json," + + "index ic_char((cast(c->'$' as char(32) array)))," + + "index ic_unsigned((cast(c->'$.unsigned' as unsigned array)))," + + "index ic_signed((cast(c->'$.signed' as unsigned array)))" + + ")") + tk.MustExec("create table t1(a int, b int, c int," + + "vab int as (a + b) virtual," + + "vc int as (c - 5) virtual," + + "vvc int as (b - vc) virtual," + + "vvabvvc int as (vab * vvc) virtual," + + "index ib((b + 1))," + + "index icvab((c + vab))," + + "index ivvcvab((vvc / vab))" + + ")") + + is := dom.InfoSchema() + tableNames := []string{"t", "t1"} + tblName2TblID := make(map[string]int64) + tblID2Tbl := make(map[int64]table.Table) + for _, tblName := range tableNames { + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr(tblName)) + require.NoError(t, err) + tblName2TblID[tblName] = tbl.Meta().ID + tblID2Tbl[tbl.Meta().ID] = tbl + } + + var input []struct { + TableName string + InputColNames []string + } + var output []struct { + TableName string + InputColNames []string + OutputColNames []string + } + testData := GetPlanStatsData() + testData.LoadTestCases(t, &input, &output) + + for i, testCase := range input { + // prepare the input + tbl := tblID2Tbl[tblName2TblID[testCase.TableName]] + require.NotNil(t, tbl) + neededItems := make([]model.TableItemID, 0, len(testCase.InputColNames)) + for _, colName := range testCase.InputColNames { + col := tbl.Meta().FindPublicColumnByName(colName) + require.NotNil(t, col) + neededItems = append(neededItems, model.TableItemID{TableID: tbl.Meta().ID, ID: col.ID}) + } + + // call the function + res := plannercore.CollectDependingVirtualCols(tblID2Tbl, neededItems) + + // record and check the output + cols := make([]string, 0, len(res)) + for _, tblColID := range res { + colName := tbl.Meta().FindColumnNameByID(tblColID.ID) + require.NotEmpty(t, colName) + cols = append(cols, colName) + } + slices.Sort(cols) + testdata.OnRecord(func() { + output[i].TableName = testCase.TableName + output[i].InputColNames = testCase.InputColNames + output[i].OutputColNames = cols + }) + require.Equal(t, output[i].OutputColNames, cols) + } +} diff --git a/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json b/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json new file mode 100644 index 0000000000000..c9496da4c8a0a --- /dev/null +++ b/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json @@ -0,0 +1,66 @@ +[ + { + "name": "TestCollectDependingVirtualCols", + "cases": [ + { + "tableName": "t", + "inputColNames": [ + "a", + "b" + ] + }, + { + "tableName": "t", + "inputColNames": [ + "c" + ] + }, + { + "tableName": "t", + "inputColNames": [ + "b", + "c" + ] + }, + { + "tableName": "t1", + "inputColNames": [ + "a" + ] + }, + { + "tableName": "t1", + "inputColNames": [ + "b" + ] + }, + { + "tableName": "t1", + "inputColNames": [ + "c" + ] + }, + { + "tableName": "t1", + "inputColNames": [ + "vab" + ] + }, + { + "tableName": "t1", + "inputColNames": [ + "vab", + "c" + ] + }, + { + "tableName": "t1", + "inputColNames": [ + "vc", + "c", + "vvc" + ] + } + ] + } +] diff --git a/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json new file mode 100644 index 0000000000000..93552f8bed709 --- /dev/null +++ b/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json @@ -0,0 +1,105 @@ +[ + { + "Name": "TestCollectDependingVirtualCols", + "Cases": [ + { + "TableName": "t", + "InputColNames": [ + "a", + "b" + ], + "OutputColNames": [] + }, + { + "TableName": "t", + "InputColNames": [ + "c" + ], + "OutputColNames": [ + "_v$_ic_char_0", + "_v$_ic_signed_0", + "_v$_ic_unsigned_0" + ] + }, + { + "TableName": "t", + "InputColNames": [ + "b", + "c" + ], + "OutputColNames": [ + "_v$_ic_char_0", + "_v$_ic_signed_0", + "_v$_ic_unsigned_0" + ] + }, + { + "TableName": "t1", + "InputColNames": [ + "a" + ], + "OutputColNames": [ + "vab" + ] + }, + { + "TableName": "t1", + "InputColNames": [ + "b" + ], + "OutputColNames": [ + "_v$_ib_0", + "vab", + "vvc" + ] + }, + { + "TableName": "t1", + "InputColNames": [ + "c" + ], + "OutputColNames": [ + "_v$_icvab_0", + "vc" + ] + }, + { + "TableName": "t1", + "InputColNames": [ + "vab" + ], + "OutputColNames": [ + "_v$_icvab_0", + "_v$_ivvcvab_0", + "vvabvvc" + ] + }, + { + "TableName": "t1", + "InputColNames": [ + "vab", + "c" + ], + "OutputColNames": [ + "_v$_icvab_0", + "_v$_ivvcvab_0", + "vc", + "vvabvvc" + ] + }, + { + "TableName": "t1", + "InputColNames": [ + "vc", + "c", + "vvc" + ], + "OutputColNames": [ + "_v$_icvab_0", + "_v$_ivvcvab_0", + "vvabvvc" + ] + } + ] + } +] diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 0a7fa6454fc94..21e0906088f93 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -2338,10 +2338,7 @@ func decodeKeyFromString(ctx sessionctx.Context, s string) string { sc.AppendWarning(errors.Errorf("infoschema not found when decoding key: %X", key)) return s } - tbl, _ := is.TableByID(tableID) - if tbl == nil { - tbl, _, _ = is.FindTableByPartitionID(tableID) - } + tbl, _ := infoschema.FindTableByTblOrPartID(is, tableID) loc := ctx.GetSessionVars().Location() if tablecodec.IsRecordKey(key) { ret, err := decodeRecordKey(key, tableID, tbl, loc) diff --git a/planner/core/optimizer.go b/planner/core/optimizer.go index df38bb17ca41c..2b163d577ebfb 100644 --- a/planner/core/optimizer.go +++ b/planner/core/optimizer.go @@ -370,12 +370,7 @@ func refineCETrace(sctx sessionctx.Context) { traceRecords := stmtCtx.OptimizerCETrace is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) for _, rec := range traceRecords { - tbl, ok := is.TableByID(rec.TableID) - if ok { - rec.TableName = tbl.Meta().Name.O - continue - } - tbl, _, _ = is.FindTableByPartitionID(rec.TableID) + tbl, _ := infoschema.FindTableByTblOrPartID(is, rec.TableID) if tbl != nil { rec.TableName = tbl.Meta().Name.O continue diff --git a/planner/core/plan_stats.go b/planner/core/plan_stats.go index a4f579459f249..a650d72b0935b 100644 --- a/planner/core/plan_stats.go +++ b/planner/core/plan_stats.go @@ -23,7 +23,9 @@ import ( "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/variable" + "github.com/pingcap/tidb/sessiontxn" "github.com/pingcap/tidb/statistics" + "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/mathutil" "go.uber.org/zap" @@ -45,7 +47,25 @@ func (collectPredicateColumnsPoint) optimize(_ context.Context, plan LogicalPlan if !histNeeded { return plan, nil } - histNeededIndices := collectSyncIndices(plan.SCtx(), histNeededColumns) + + // Prepare the table metadata to avoid repeatedly fetching from the infoSchema below. + is := sessiontxn.GetTxnManager(plan.SCtx()).GetTxnInfoSchema() + tblID2Tbl := make(map[int64]table.Table) + for _, neededCol := range histNeededColumns { + tbl, _ := infoschema.FindTableByTblOrPartID(is, neededCol.TableID) + if tbl == nil { + continue + } + tblID2Tbl[neededCol.TableID] = tbl + } + + // collect needed virtual columns from already needed columns + // Note that we use the dependingVirtualCols only to collect needed index stats, but not to trigger stats loading on + // the virtual columns themselves. It's because virtual columns themselves don't have statistics, while expression + // indexes, which are indexes on virtual columns, have statistics. We don't waste the resource here now. + dependingVirtualCols := CollectDependingVirtualCols(tblID2Tbl, histNeededColumns) + + histNeededIndices := collectSyncIndices(plan.SCtx(), append(histNeededColumns, dependingVirtualCols...), tblID2Tbl) histNeededItems := collectHistNeededItems(histNeededColumns, histNeededIndices) if histNeeded && len(histNeededItems) > 0 { err := RequestLoadStats(plan.SCtx(), histNeededItems, syncWait) @@ -124,24 +144,103 @@ func SyncWaitStatsLoad(plan LogicalPlan) error { return nil } +// CollectDependingVirtualCols collects the virtual columns that depend on the needed columns, and returns them in a new slice. +// +// Why do we need this? +// It's mainly for stats sync loading. +// Currently, virtual columns themselves don't have statistics. But expression indexes, which are indexes on virtual +// columns, have statistics. We need to collect needed virtual columns, then needed expression index stats can be +// collected for sync loading. +// In normal cases, if a virtual column can be used, which means related statistics may be needed, the corresponding +// expressions in the query must have already been replaced with the virtual column before here. So we just need to treat +// them like normal columns in stats sync loading, which means we just extract the Column from the expressions, the +// virtual columns we want will be there. +// However, in some cases (the mv index case now), the expressions are not replaced with the virtual columns before here. +// Instead, we match the expression in the query against the expression behind the virtual columns after here when +// building the access paths. This means we are unable to known what virtual columns will be needed by just extracting +// the Column from the expressions here. So we need to manually collect the virtual columns that may be needed. +// +// Note 1: As long as a virtual column depends on the needed columns, it will be collected. This could collect some virtual +// columns that are not actually needed. +// It's OK because that's how sync loading is expected. Sync loading only needs to ensure all actually needed stats are +// triggered to be loaded. Other logic of sync loading also works like this. +// If we want to collect only the virtual columns that are actually needed, we need to make the checking logic here exactly +// the same as the logic for generating the access paths, which will make the logic here very complicated. +// +// Note 2: Only direct dependencies are considered here. +// If a virtual column depends on another virtual column, and the latter depends on the needed columns, then the former +// will not be collected. +// For example: create table t(a int, b int, c int as (a+b), d int as (c+1)); If a is needed, then c will be collected, +// but d will not be collected. +// It's because currently it's impossible that statistics related to indirectly depending columns are actually needed. +// If we need to check indirect dependency some day, we can easily extend the logic here. +func CollectDependingVirtualCols(tblID2Tbl map[int64]table.Table, neededItems []model.TableItemID) []model.TableItemID { + generatedCols := make([]model.TableItemID, 0) + + // group the neededItems by table id + tblID2neededColIDs := make(map[int64][]int64, len(tblID2Tbl)) + for _, item := range neededItems { + if item.IsIndex { + continue + } + tblID2neededColIDs[item.TableID] = append(tblID2neededColIDs[item.TableID], item.ID) + } + + // process them by table id + for tblID, colIDs := range tblID2neededColIDs { + tbl := tblID2Tbl[tblID] + if tbl == nil { + continue + } + // collect the needed columns on this table into a set for faster lookup + colNameSet := make(map[string]struct{}, len(colIDs)) + for _, colID := range colIDs { + name := tbl.Meta().FindColumnNameByID(colID) + if name == "" { + continue + } + colNameSet[name] = struct{}{} + } + // iterate columns in this table, and collect the virtual columns that depend on the needed columns + for _, col := range tbl.Cols() { + // only handles virtual columns + if !col.IsVirtualGenerated() { + continue + } + // If this column is already needed, then skip it. + if _, ok := colNameSet[col.Name.L]; ok { + continue + } + // If there exists a needed column that is depended on by this virtual column, + // then we think this virtual column is needed. + for depCol := range col.Dependences { + if _, ok := colNameSet[depCol]; ok { + generatedCols = append(generatedCols, model.TableItemID{TableID: tblID, ID: col.ID, IsIndex: false}) + break + } + } + } + } + return generatedCols +} + // collectSyncIndices will collect the indices which includes following conditions: // 1. the indices contained the any one of histNeededColumns, eg: histNeededColumns contained A,B columns, and idx_a is // composed up by A column, then we thought the idx_a should be collected // 2. The stats condition of idx_a can't meet IsFullLoad, which means its stats was evicted previously -func collectSyncIndices(ctx sessionctx.Context, histNeededColumns []model.TableItemID) map[model.TableItemID]struct{} { +func collectSyncIndices(ctx sessionctx.Context, + histNeededColumns []model.TableItemID, + tblID2Tbl map[int64]table.Table, +) map[model.TableItemID]struct{} { histNeededIndices := make(map[model.TableItemID]struct{}) stats := domain.GetDomain(ctx).StatsHandle() for _, column := range histNeededColumns { if column.IsIndex { continue } - is := ctx.GetDomainInfoSchema().(infoschema.InfoSchema) - tbl, ok := is.TableByID(column.TableID) - if !ok { - tbl, _, _ = is.FindTableByPartitionID(column.TableID) - if tbl == nil { - continue - } + tbl := tblID2Tbl[column.TableID] + if tbl == nil { + continue } colName := tbl.Meta().FindColumnNameByID(column.ID) if colName == "" { diff --git a/planner/core/stats.go b/planner/core/stats.go index db2488c183d4e..4312f181c7a33 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/domain" "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/infoschema" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" @@ -225,10 +226,7 @@ func getTblInfoForUsedStatsByPhysicalID(sctx sessionctx.Context, id int64) (full var tbl table.Table var partDef *model.PartitionDefinition - tbl, ok := is.TableByID(id) - if !ok { - tbl, _, partDef = is.FindTableByPartitionID(id) - } + tbl, partDef = infoschema.FindTableByTblOrPartID(is, id) if tbl == nil || tbl.Meta() == nil { return }