Skip to content

Commit

Permalink
*: support use like to build range for new collation columns (#51164)
Browse files Browse the repository at this point in the history
close #48181
  • Loading branch information
time-and-fate authored Feb 20, 2024
1 parent af141d5 commit 1dae341
Show file tree
Hide file tree
Showing 22 changed files with 1,678 additions and 229 deletions.
6 changes: 4 additions & 2 deletions pkg/executor/point_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,11 +540,13 @@ func EncodeUniqueIndexValuesForKey(ctx sessionctx.Context, tblInfo *model.TableI
colInfo := tblInfo.Columns[idxInfo.Columns[i].Offset]
// table.CastValue will append 0x0 if the string value's length is smaller than the BINARY column's length.
// So we don't use CastValue for string value for now.
// TODO: merge two if branch.
// TODO: The first if branch should have been removed, because the functionality of set the collation of the datum
// have been moved to util/ranger (normal path) and getNameValuePairs/getPointGetValue (fast path). But this change
// will be cherry-picked to a hotfix, so we choose to be a bit conservative and keep this for now.
if colInfo.GetType() == mysql.TypeString || colInfo.GetType() == mysql.TypeVarString || colInfo.GetType() == mysql.TypeVarchar {
var str string
str, err = idxVals[i].ToString()
idxVals[i].SetString(str, colInfo.FieldType.GetCollate())
idxVals[i].SetString(str, idxVals[i].Collation())
} else if colInfo.GetType() == mysql.TypeEnum && (idxVals[i].Kind() == types.KindString || idxVals[i].Kind() == types.KindBytes || idxVals[i].Kind() == types.KindBinaryLiteral) {
var str string
var e types.Enum
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/index/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ func TestIndexMergeSingleCaseCouldFeelIndexMergeHint(t *testing.T) {
tk.MustQuery("explain format=\"brief\" SELECT /*+ use_index_merge(t, nslc) */ * FROM t WHERE 57260686 member of (fpi) AND \"OC8p1763XTkt.org/s/link\" member of (nslc) LIMIT 1;").Check(
testkit.Rows("Limit 1.00 root offset:0, count:1",
"└─IndexMerge 1.00 root type: union",
" ├─IndexRangeScan(Build) 1.00 cop[tikv] table:t, index:nslc(cast(`nslc` as char(1000) array), point_of_sale_country) range:[0x4F4338703137363358546B742E6F72672F732F6C696E6B,0x4F4338703137363358546B742E6F72672F732F6C696E6B], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 1.00 cop[tikv] table:t, index:nslc(cast(`nslc` as char(1000) array), point_of_sale_country) range:[\"OC8p1763XTkt.org/s/link\",\"OC8p1763XTkt.org/s/link\"], keep order:false, stats:pseudo",
" └─Limit(Probe) 1.00 cop[tikv] offset:0, count:1",
" └─Selection 1.00 cop[tikv] json_memberof(cast(57260686, json BINARY), test.t.fpi)",
" └─TableRowIDScan 1.00 cop[tikv] table:t keep order:false, stats:pseudo"))
Expand Down
12 changes: 6 additions & 6 deletions pkg/planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2230,14 +2230,14 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int, b varchar(10), c varchar(10), index idx_a_b(a, b))")
tk.MustExec("create table t2(d int)")
tk.MustExec("set @@tidb_opt_range_max_size=1275")
// 1275 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
tk.MustExec("set @@tidb_opt_range_max_size=1260")
// 1260 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
rows := tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('a', 'b', 'c')").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, a, b, c)]"))
tk.MustQuery("show warnings").Check(testkit.Rows())
rows = tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('aaaaaa', 'bbbbbb', 'cccccc');").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]"))
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))

tk.MustExec("prepare stmt1 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c'")
Expand All @@ -2252,13 +2252,13 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps})
rows = tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Rows()
// We don't limit range mem usage when rebuilding index join ranges for the cached plan. So [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc] can be built.
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]")

// Test the plan with range fallback would not be put into cache.
tk.MustExec("prepare stmt2 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c', @d='d', @e='e'")
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
"Warning 1105 skip prepared plan-cache: in-list is too long"))
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0"))
Expand Down
15 changes: 14 additions & 1 deletion pkg/planner/core/point_get_plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -1438,7 +1438,15 @@ func getNameValuePairs(ctx sessionctx.Context, tbl *model.TableInfo, tblName mod
col := model.FindColumnInfo(tbl.Cols(), colName.Name.Name.L)
if col == nil { // Handling the case when the column is _tidb_rowid.
return append(nvPairs, nameValuePair{colName: colName.Name.Name.L, colFieldType: types.NewFieldType(mysql.TypeLonglong), value: d, con: con}), false
} else if col.GetType() == mysql.TypeString && col.GetCollate() == charset.CollationBin { // This type we needn't to pad `\0` in here.
}

// As in buildFromBinOp in util/ranger, when we build key from the expression to do range scan or point get on
// a string column, we should set the collation of the string datum to collation of the column.
if col.FieldType.EvalType() == types.ETString && (d.Kind() == types.KindString || d.Kind() == types.KindBinaryLiteral) {
d.SetString(d.GetString(), col.FieldType.GetCollate())
}

if col.GetType() == mysql.TypeString && col.GetCollate() == charset.CollationBin { // This type we needn't to pad `\0` in here.
return append(nvPairs, nameValuePair{colName: colName.Name.Name.L, colFieldType: &col.FieldType, value: d, con: con}), false
}
if !checkCanConvertInPointGet(col, d) {
Expand Down Expand Up @@ -1468,6 +1476,11 @@ func getPointGetValue(stmtCtx *stmtctx.StatementContext, col *model.ColumnInfo,
if !checkCanConvertInPointGet(col, *d) {
return nil
}
// As in buildFromBinOp in util/ranger, when we build key from the expression to do range scan or point get on
// a string column, we should set the collation of the string datum to collation of the column.
if col.FieldType.EvalType() == types.ETString && (d.Kind() == types.KindString || d.Kind() == types.KindBinaryLiteral) {
d.SetString(d.GetString(), col.FieldType.GetCollate())
}
dVal, err := d.ConvertTo(stmtCtx, &col.FieldType)
if err != nil {
return nil
Expand Down
19 changes: 10 additions & 9 deletions pkg/planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is1(s1) range:[\"Abc\",\"Abc\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t5, index:is2(s2) range:(\"zzz\",+inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"B啊a\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CcC\",\"CcC\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"\\x0eJ\\xfb@\\xd5J\\x0e3\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CCC\",\"CCC\"], keep order:false, stats:pseudo",
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t5 keep order:false, stats:pseudo"
],
"Result": [
Expand All @@ -144,7 +144,7 @@
"Plan": [
"IndexMerge 0.03 root type: intersection",
"├─IndexRangeScan(Build) 33.33 cop[tikv] table:t6, index:PRIMARY(s1, s2) range:(\"Abc\" \"zzz\",\"Abc\" +inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"A啊a\",\"A啊a\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"\\x0e3\\xfb@\\xd5J\\x0e3\",\"\\x0e3\\xfb@\\xd5J\\x0e3\"], keep order:false, stats:pseudo",
"└─Selection(Probe) 0.03 cop[tikv] gt(test.t6.s2, \"zzz\"), not(like(test.t6.s4, \"Cd_\", 92))",
" └─TableRowIDScan 0.03 cop[tikv] table:t6 keep order:false, stats:pseudo"
],
Expand Down Expand Up @@ -172,13 +172,14 @@
{
"SQL": "select /*+ use_index_merge(t8, primary,is2,is3,is4,is5) */ * from t8 where s1 like '啊A%' and s2 > 'abc' and s3 > 'cba' and s4 in ('aA', '??') and s5 = 'test,2'",
"Plan": [
"Selection 1.42 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.59 root type: intersection",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(0x616263,+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(0x636261,+inf], keep order:false, stats:pseudo",
"Selection 0.04 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.06 root type: intersection",
" ├─IndexRangeScan(Build) 250.00 cop[tikv] table:t8, index:PRIMARY(s1) range:[\"UJ\\x00A\",\"UJ\\x00B\"), keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(\"abc\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(\"cba\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t8, index:is4(s4) range:[\"aA\",\"aA\"], [\"??\",\"??\"], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.59 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 2.22 cop[tikv] table:t8 keep order:false, stats:pseudo"
" └─Selection(Probe) 0.06 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 0.06 cop[tikv] table:t8 keep order:false, stats:pseudo"
],
"Result": [
"啊aabbccdd abcc cccc aA tEsT,2"
Expand Down
1 change: 1 addition & 0 deletions pkg/util/ranger/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ go_library(
"//pkg/util/codec",
"//pkg/util/collate",
"//pkg/util/dbterror",
"//pkg/util/hack",
"//pkg/util/mathutil",
"@com_github_pingcap_errors//:errors",
],
Expand Down
12 changes: 2 additions & 10 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/collate"
)

// conditionChecker checks if this condition can be pushed to index planner.
type conditionChecker struct {
ctx sessionctx.Context
checkerCol *expression.Column
length int
optPrefixIndexSingleScan bool
Expand Down Expand Up @@ -139,16 +141,6 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction

func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) {
_, collation := scalar.CharsetAndCollation()
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
// Finally, the range comes to be [`, A], which is actually an empty range.
// See https://github.com/pingcap/tidb/issues/31174 for more details.
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
return false, true
}
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) {
return false, true
}
Expand Down
Loading

0 comments on commit 1dae341

Please sign in to comment.