pingcap · ti-chi-bot · Dec 8, 2023 · Nov 30, 2023 · Dec 5, 2023 · Dec 5, 2023
diff --git a/cmd/explaintest/r/planner_issue.result b/cmd/explaintest/r/planner_issue.result
@@ -0,0 +1,29 @@
+drop table if exists t1, t2;
+create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
+insert into t1 value('测试'),('测试  '),('xxx ');
+explain format = brief select *,length(a) from t1 where a like '测试 %';
+id	estRows	task	access object	operator info
+Projection	250.00	root		test.t1.a, length(test.t1.a)->Column#3
+└─IndexReader	250.00	root		index:IndexRangeScan
+  └─IndexRangeScan	250.00	cop[tikv]	table:t1, index:ia(a)	range:["测试 ","测试!"), keep order:false, stats:pseudo
+explain format = brief select *,length(a) from t1 where a like '测试';
+id	estRows	task	access object	operator info
+Projection	10.00	root		test.t1.a, length(test.t1.a)->Column#3
+└─IndexReader	10.00	root		index:IndexRangeScan
+  └─IndexRangeScan	10.00	cop[tikv]	table:t1, index:ia(a)	range:["测试","测试"], keep order:false, stats:pseudo
+select *,length(a) from t1 where a like '测试 %';
+a	length(a)
+测试	6
+测试  	8
+select *,length(a) from t1 where a like '测试';
+a	length(a)
+测试	6
+测试  	8
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+id	estRows	task	access object	operator info
+IndexReader	250.00	root		index:Selection
+└─Selection	250.00	cop[tikv]		like(test.t1.a, "xxx_", 92)
+  └─IndexRangeScan	250.00	cop[tikv]	table:t1, index:ia(a)	range:["xxx","xxy"), keep order:false, stats:pseudo
+select * from t1 use index (ia) where a like 'xxx_';
+a
+xxx 
diff --git a/cmd/explaintest/t/planner_issue.test b/cmd/explaintest/t/planner_issue.test
@@ -0,0 +1,10 @@
+# https://github.com/pingcap/tidb/issues/48983
+drop table if exists t1, t2;
+create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
+insert into t1 value('测试'),('测试  '),('xxx ');
+explain format = brief select *,length(a) from t1 where a like '测试 %';
+explain format = brief select *,length(a) from t1 where a like '测试';
+select *,length(a) from t1 where a like '测试 %';
+select *,length(a) from t1 where a like '测试';
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+select * from t1 use index (ia) where a like 'xxx_';
diff --git a/planner/core/testdata/plan_suite_out.json b/planner/core/testdata/plan_suite_out.json
@@ -1188,7 +1188,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc_'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc%af'",
@@ -1216,7 +1216,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc\\__'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 123",

diff --git a/util/ranger/points.go b/util/ranger/points.go
@@ -23,6 +23,7 @@ import (
 	"github.com/pingcap/tidb/errno"
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/parser/ast"
+	"github.com/pingcap/tidb/parser/charset"
 	"github.com/pingcap/tidb/parser/mysql"
 	"github.com/pingcap/tidb/sessionctx/stmtctx"
 	"github.com/pingcap/tidb/types"
@@ -677,9 +678,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
 			break
 		} else if pattern[i] == '_' {
 			// Get the prefix, but exclude the prefix.
-			// e.g., "abc_x", the start point exclude "abc",
-			// because the string length is more than 3.
-			exclude = true
+			// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
+			//
+			// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
+			// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
+			// 'xxx' from 'xxx   ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
+			// we will actually miss 'xxx   ', which will cause wrong results.
+			if !isPadSpaceCollation(collation) {
+				exclude = true
+			}
 			isExactMatch = false
 			break
 		}
@@ -714,6 +721,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
 	return []*point{startPoint, endPoint}
 }
 
+// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
+// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
+// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
+// implemented in the future.
+func isPadSpaceCollation(collation string) bool {
+	return collation != charset.CollationBin
+}
+
 func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point {
 	switch n := expr.FuncName.L; n {
 	case ast.IsTruthWithoutNull:

diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go
@@ -1424,7 +1424,7 @@ create table t(
 			exprStr:     "a LIKE 'abc_'",
 			accessConds: "[like(test.t.a, abc_, 92)]",
 			filterConds: "[like(test.t.a, abc_, 92)]",
-			resultStr:   "[(\"abc\",\"abd\")]",
+			resultStr:   "[[\"abc\",\"abd\")]",
 		},
 		{
 			indexPos:    0,