diff --git a/cmd/explaintest/r/planner_issue.result b/cmd/explaintest/r/planner_issue.result new file mode 100644 index 0000000000000..9bf55a7187a37 --- /dev/null +++ b/cmd/explaintest/r/planner_issue.result @@ -0,0 +1,29 @@ +drop table if exists t1, t2; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '),('xxx '); +explain format = brief select *,length(a) from t1 where a like '测试 %'; +id estRows task access object operator info +Projection 250.00 root test.t1.a, length(test.t1.a)->Column#3 +└─IndexReader 250.00 root index:IndexRangeScan + └─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo +explain format = brief select *,length(a) from t1 where a like '测试'; +id estRows task access object operator info +Projection 10.00 root test.t1.a, length(test.t1.a)->Column#3 +└─IndexReader 10.00 root index:IndexRangeScan + └─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo +select *,length(a) from t1 where a like '测试 %'; +a length(a) +测试 6 +测试 8 +select *,length(a) from t1 where a like '测试'; +a length(a) +测试 6 +测试 8 +explain format = brief select * from t1 use index (ia) where a like 'xxx_'; +id estRows task access object operator info +IndexReader 250.00 root index:Selection +└─Selection 250.00 cop[tikv] like(test.t1.a, "xxx_", 92) + └─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo +select * from t1 use index (ia) where a like 'xxx_'; +a +xxx diff --git a/cmd/explaintest/t/planner_issue.test b/cmd/explaintest/t/planner_issue.test new file mode 100644 index 0000000000000..8f42e7e872937 --- /dev/null +++ b/cmd/explaintest/t/planner_issue.test @@ -0,0 +1,10 @@ +# https://github.com/pingcap/tidb/issues/48983 +drop table if exists t1, t2; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '),('xxx '); +explain format = brief select *,length(a) from t1 where a like '测试 %'; +explain format = brief select *,length(a) from t1 where a like '测试'; +select *,length(a) from t1 where a like '测试 %'; +select *,length(a) from t1 where a like '测试'; +explain format = brief select * from t1 use index (ia) where a like 'xxx_'; +select * from t1 use index (ia) where a like 'xxx_'; diff --git a/planner/core/casetest/testdata/plan_suite_out.json b/planner/core/casetest/testdata/plan_suite_out.json index a358f73dcbeb4..3a6ab8d934db6 100644 --- a/planner/core/casetest/testdata/plan_suite_out.json +++ b/planner/core/casetest/testdata/plan_suite_out.json @@ -2811,7 +2811,7 @@ }, { "SQL": "select a from t where c_str like 'abc_'", - "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc%af'", @@ -2839,7 +2839,7 @@ }, { "SQL": "select a from t where c_str like 'abc\\__'", - "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 123", diff --git a/util/ranger/points.go b/util/ranger/points.go index e5061caa43446..6bf08c3c0f510 100644 --- a/util/ranger/points.go +++ b/util/ranger/points.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/tidb/errno" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/parser/ast" + "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" @@ -677,9 +678,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po break } else if pattern[i] == '_' { // Get the prefix, but exclude the prefix. - // e.g., "abc_x", the start point exclude "abc", - // because the string length is more than 3. - exclude = true + // e.g., "abc_x", the start point excludes "abc" because the string length is more than 3. + // + // However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for + // PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish + // 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation, + // we will actually miss 'xxx ', which will cause wrong results. + if !isPadSpaceCollation(collation) { + exclude = true + } isExactMatch = false break } @@ -714,6 +721,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po return []*point{startPoint, endPoint} } +// isPadSpaceCollation returns whether the collation is a PAD SPACE collation. +// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple +// collation != binary check here. We may also move it to collation related packages when NO PAD collations are +// implemented in the future. +func isPadSpaceCollation(collation string) bool { + return collation != charset.CollationBin +} + func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point { switch n := expr.FuncName.L; n { case ast.IsTruthWithoutNull: diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index db54c134b6cfd..48e6378548c68 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -1424,7 +1424,7 @@ create table t( exprStr: "a LIKE 'abc_'", accessConds: "[like(test.t.a, abc_, 92)]", filterConds: "[like(test.t.a, abc_, 92)]", - resultStr: "[(\"abc\",\"abd\")]", + resultStr: "[[\"abc\",\"abd\")]", }, { indexPos: 0,