From bef241496c13d9241fd687df0784391449fc34c9 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Sat, 23 Dec 2023 07:58:55 +0800 Subject: [PATCH] planner: plan cache supports Batch/PointGet converted from (primary keys) in ((...), ...) (#44838) (#49735) close pingcap/tidb#44830 --- planner/core/find_best_task.go | 22 +------- planner/core/plan_cache.go | 4 +- planner/core/plan_cache_test.go | 44 ++++++++++++++++ planner/core/plan_cache_utils.go | 89 ++++++++++++++++++++++++++++++++ sessionctx/variable/session.go | 3 ++ 5 files changed, 139 insertions(+), 23 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 973f36fbc6ab3..630c3ed1c9eb7 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1040,7 +1040,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter // Batch/PointGet plans may be over-optimized, like `a>=1(?) and a<=1(?)` --> `a=1` --> PointGet(a=1). // For safety, prevent these plans from the plan cache here. - if !pointGetTask.invalid() && expression.MaybeOverOptimized4PlanCache(ds.ctx, candidate.path.AccessConds) && !ds.isSafePointGetPlan4PlanCache(candidate.path) { + if !pointGetTask.invalid() && expression.MaybeOverOptimized4PlanCache(ds.ctx, candidate.path.AccessConds) && !isSafePointGetPath4PlanCache(ds.ctx, candidate.path) { ds.ctx.GetSessionVars().StmtCtx.SetSkipPlanCache(errors.New("Batch/PointGet plans may be over-optimized")) } @@ -1123,26 +1123,6 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter return } -func (ds *DataSource) isSafePointGetPlan4PlanCache(path *util.AccessPath) bool { - // PointGet might contain some over-optimized assumptions, like `a>=1 and a<=1` --> `a=1`, but - // these assumptions may be broken after parameters change. - - // safe scenario 1: each column corresponds to a single EQ, `a=1 and b=2 and c=3` --> `[1, 2, 3]` - if len(path.Ranges) > 0 && path.Ranges[0].Width() == len(path.AccessConds) { - for _, accessCond := range path.AccessConds { - f, ok := accessCond.(*expression.ScalarFunction) - if !ok { - return false - } - if f.FuncName.L != ast.EQ { - return false - } - } - return true - } - return false -} - func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, candidate *candidatePath, _ *physicalOptimizeOp) (task task, err error) { if prop.IsFlashProp() || prop.TaskTp == property.CopSingleReadTaskType || !prop.IsSortItemEmpty() { return invalidTask, nil diff --git a/planner/core/plan_cache.go b/planner/core/plan_cache.go index b6e5c3824781b..9ca1c7c1247c0 100644 --- a/planner/core/plan_cache.go +++ b/planner/core/plan_cache.go @@ -429,7 +429,7 @@ func rebuildRange(p Plan) error { if err != nil { return err } - if len(ranges.Ranges) == 0 || len(ranges.AccessConds) != len(x.AccessConditions) { + if len(ranges.Ranges) != 1 || len(ranges.AccessConds) != len(x.AccessConditions) { return errors.New("failed to rebuild range: the length of the range has changed") } for i := range x.IndexValues { @@ -447,7 +447,7 @@ func rebuildRange(p Plan) error { if err != nil { return err } - if len(ranges) == 0 { + if len(ranges) != 1 { return errors.New("failed to rebuild range: the length of the range has changed") } x.Handle = kv.IntHandle(ranges[0].LowVal[0].GetInt64()) diff --git a/planner/core/plan_cache_test.go b/planner/core/plan_cache_test.go index 2aa0a6c021387..857e146f115a1 100644 --- a/planner/core/plan_cache_test.go +++ b/planner/core/plan_cache_test.go @@ -529,3 +529,47 @@ func TestIssue42150(t *testing.T) { tk.MustExec("execute st") tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("1")) } + +func TestIssue44830(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec(`use test`) + tk.MustExec(`set @@tidb_opt_fix_control = "44830:ON"`) + tk.MustExec(`create table t (a int, primary key(a))`) + tk.MustExec(`create table t1 (a int, b int, primary key(a, b))`) // multiple-column primary key + tk.MustExec(`insert into t values (1), (2), (3)`) + tk.MustExec(`insert into t1 values (1, 1), (2, 2), (3, 3)`) + tk.MustExec(`set @a=1, @b=2, @c=3`) + + // single-column primary key cases + tk.MustExec(`prepare st from 'select * from t where 1=1 and a in (?, ?, ?)'`) + tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3")) + tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1")) + tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @b, @b, @b`).Sort().Check(testkit.Rows("2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3")) + tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2")) + tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list + + // multi-column primary key cases + tk.MustExec(`prepare st from 'select * from t1 where 1=1 and (a, b) in ((?, ?), (?, ?), (?, ?))'`) + tk.MustQuery(`execute st using @a, @a, @b, @b, @c, @c`).Sort().Check(testkit.Rows("1 1", "2 2", "3 3")) + tk.MustQuery(`execute st using @a, @a, @b, @b, @c, @c`).Sort().Check(testkit.Rows("1 1", "2 2", "3 3")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1")) + tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @b, @b, @b, @b, @b, @b`).Sort().Check(testkit.Rows("2 2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @b, @b, @b, @b, @c, @c`).Sort().Check(testkit.Rows("2 2", "3 3")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @a, @a, @a, @a, @a, @a`).Sort().Check(testkit.Rows("1 1")) + tk.MustQuery(`execute st using @a, @a, @a, @a, @a, @a`).Sort().Check(testkit.Rows("1 1")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list + tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2")) + tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list +} diff --git a/planner/core/plan_cache_utils.go b/planner/core/plan_cache_utils.go index 5431f7ef71c27..4c2c29dddfd69 100644 --- a/planner/core/plan_cache_utils.go +++ b/planner/core/plan_cache_utils.go @@ -22,11 +22,13 @@ import ( "unsafe" "github.com/pingcap/errors" + "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser" "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/types" @@ -454,3 +456,90 @@ func GetPreparedStmt(stmt *ast.ExecuteStmt, vars *variable.SessionVars) (*PlanCa } return nil, ErrStmtNotFound } + +func isSafePointGetPath4PlanCache(sctx sessionctx.Context, path *util.AccessPath) bool { + // PointGet might contain some over-optimized assumptions, like `a>=1 and a<=1` --> `a=1`, but + // these assumptions may be broken after parameters change. + + if isSafePointGetPath4PlanCacheScenario1(path) { + return true + } + + // TODO: enable this fix control switch by default after more test cases are added. + if sctx != nil && sctx.GetSessionVars() != nil && sctx.GetSessionVars().OptimizerFixControl != nil { + v, ok := sctx.GetSessionVars().OptimizerFixControl[variable.TiDBOptFixControl44830] + if ok && variable.TiDBOptOn(v) && (isSafePointGetPath4PlanCacheScenario2(path) || isSafePointGetPath4PlanCacheScenario3(path)) { + return true + } + } + + return false +} + +func isSafePointGetPath4PlanCacheScenario1(path *util.AccessPath) bool { + // safe scenario 1: each column corresponds to a single EQ, `a=1 and b=2 and c=3` --> `[1, 2, 3]` + if len(path.Ranges) <= 0 || path.Ranges[0].Width() != len(path.AccessConds) { + return false + } + for _, accessCond := range path.AccessConds { + f, ok := accessCond.(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.EQ { // column = constant + return false + } + } + return true +} + +func isSafePointGetPath4PlanCacheScenario2(path *util.AccessPath) bool { + // safe scenario 2: this Batch or PointGet is simply from a single IN predicate, `key in (...)` + if len(path.Ranges) <= 0 || len(path.AccessConds) != 1 { + return false + } + f, ok := path.AccessConds[0].(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.In { + return false + } + return len(path.Ranges) == len(f.GetArgs())-1 // no duplicated values in this in-list for safety. +} + +func isSafePointGetPath4PlanCacheScenario3(path *util.AccessPath) bool { + // safe scenario 3: this Batch or PointGet is simply from a simple DNF like `key=? or key=? or key=?` + if len(path.Ranges) <= 0 || len(path.AccessConds) != 1 { + return false + } + f, ok := path.AccessConds[0].(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.LogicOr { + return false + } + + dnfExprs := expression.FlattenDNFConditions(f) + if len(path.Ranges) != len(dnfExprs) { + // no duplicated values in this in-list for safety. + // e.g. `k=1 or k=2 or k=1` --> [[1, 1], [2, 2]] + return false + } + + for _, expr := range dnfExprs { + f, ok := expr.(*expression.ScalarFunction) + if !ok { + return false + } + switch f.FuncName.L { + case ast.EQ: // (k=1 or k=2) --> [k=1, k=2] + case ast.LogicAnd: // ((k1=1 and k2=1) or (k1=2 and k2=2)) --> [k1=1 and k2=1, k2=2 and k2=2] + cnfExprs := expression.FlattenCNFConditions(f) + if path.Ranges[0].Width() != len(cnfExprs) { // not all key columns are specified + return false + } + for _, expr := range cnfExprs { // k1=1 and k2=1 + f, ok := expr.(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.EQ { + return false + } + } + default: + return false + } + } + return true +} diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 16f630cde85ef..c9647fab0e903 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1337,6 +1337,9 @@ var ( TiDBOptFixControl44262 uint64 = 44262 // TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges. TiDBOptFixControl44389 uint64 = 44389 + // TiDBOptFixControl44830 controls whether to allow to cache Batch/PointGet from some complex scenarios. + // See #44830 for more details. + TiDBOptFixControl44830 uint64 = 44830 ) // GetOptimizerFixControlValue returns the specified value of the optimizer fix control.