planner,expression,util: scatter hotspot index in certain scenes by e…

…xpression index (#30659) close #31040
pingcap · Feb 18, 2022 · 6517f3b · 6517f3b
1 parent c102b6b
commit 6517f3b
Show file tree

Hide file tree

Showing 18 changed files with 1,223 additions and 20 deletions.
diff --git a/cmd/explaintest/r/explain_generate_column_substitute.result b/cmd/explaintest/r/explain_generate_column_substitute.result
@@ -523,7 +523,7 @@ a
 b
 select @@tidb_allow_function_for_expression_index;
 @@tidb_allow_function_for_expression_index
-lower, md5, reverse, upper, vitess_hash
+lower, md5, reverse, tidb_shard, upper, vitess_hash
 CREATE TABLE `PK_S_MULTI_30_tmp` (
 `COL1` double NOT NULL,
 `COL2` double NOT NULL,

diff --git a/cmd/explaintest/r/explain_shard_index.result b/cmd/explaintest/r/explain_shard_index.result
@@ -0,0 +1,73 @@
+use test;
+drop table if exists test3, test5;
+create table test3(id int primary key clustered, a int, b int, unique key uk_expr((tidb_shard(a)),a));
+create table test5(id int primary key clustered, a int, b int, unique key uk_expr((tidb_shard(a)),a,b));
+explain format=brief select * from test3 where a=100;
+id	estRows	task	access object	operator info
+Projection	1.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Point_Get	1.00	root	table:test3, index:uk_expr(tidb_shard(`a`), a)	
+explain format=brief select * from test3 where a=100 and (b = 100 or b = 200);
+id	estRows	task	access object	operator info
+Projection	0.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Selection	0.00	root		or(eq(test.test3.b, 100), eq(test.test3.b, 200))
+  └─Point_Get	1.00	root	table:test3, index:uk_expr(tidb_shard(`a`), a)	
+explain format=brief select * from test3 where tidb_shard(a) = 8;
+id	estRows	task	access object	operator info
+Projection	10.00	root		test.test3.id, test.test3.a, test.test3.b
+└─IndexLookUp	10.00	root		
+  ├─IndexRangeScan(Build)	10.00	cop[tikv]	table:test3, index:uk_expr(tidb_shard(`a`), a)	range:[8,8], keep order:false, stats:pseudo
+  └─TableRowIDScan(Probe)	10.00	cop[tikv]	table:test3	keep order:false, stats:pseudo
+explain format=brief select * from test3 where a=100 or b = 200;
+id	estRows	task	access object	operator info
+Projection	8000.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Selection	8000.00	root		or(and(eq(tidb_shard(test.test3.a), 8), eq(test.test3.a, 100)), eq(test.test3.b, 200))
+  └─TableReader	10000.00	root		data:TableFullScan
+    └─TableFullScan	10000.00	cop[tikv]	table:test3	keep order:false, stats:pseudo
+explain format=brief select * from test3 where a=100 or a = 300;
+id	estRows	task	access object	operator info
+Projection	2.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Batch_Point_Get	2.00	root	table:test3, index:uk_expr(tidb_shard(`a`), a)	keep order:false, desc:false
+explain format=brief select * from test3 where a=100 or a = 300 or a > 997;
+id	estRows	task	access object	operator info
+Projection	8000.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Selection	8000.00	root		or(and(eq(tidb_shard(test.test3.a), 8), eq(test.test3.a, 100)), or(and(eq(tidb_shard(test.test3.a), 227), eq(test.test3.a, 300)), gt(test.test3.a, 997)))
+  └─TableReader	10000.00	root		data:TableFullScan
+    └─TableFullScan	10000.00	cop[tikv]	table:test3	keep order:false, stats:pseudo
+explain format=brief select * from test3 where ((a=100 and b = 100) or a = 200) and b = 300;
+id	estRows	task	access object	operator info
+Projection	0.01	root		test.test3.id, test.test3.a, test.test3.b
+└─TableReader	0.01	root		data:Selection
+  └─Selection	0.01	cop[tikv]		eq(test.test3.b, 300), or(0, eq(test.test3.a, 200))
+    └─TableFullScan	10000.00	cop[tikv]	table:test3	keep order:false, stats:pseudo
+explain format=brief select * from test3 where a = b;
+id	estRows	task	access object	operator info
+Projection	8000.00	root		test.test3.id, test.test3.a, test.test3.b
+└─TableReader	8000.00	root		data:Selection
+  └─Selection	8000.00	cop[tikv]		eq(test.test3.a, test.test3.b)
+    └─TableFullScan	10000.00	cop[tikv]	table:test3	keep order:false, stats:pseudo
+explain format=brief select * from test3 where a = b and b = 100;
+id	estRows	task	access object	operator info
+Projection	0.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Selection	0.00	root		eq(test.test3.b, 100)
+  └─Point_Get	1.00	root	table:test3, index:uk_expr(tidb_shard(`a`), a)	
+explain format=brief select * from test5 where a=100 and b = 100;
+id	estRows	task	access object	operator info
+Projection	1.00	root		test.test5.id, test.test5.a, test.test5.b
+└─Point_Get	1.00	root	table:test5, index:uk_expr(tidb_shard(`a`), a, b)	
+explain format=brief select * from test5 where (a=100 and b = 100) or  (a=200 and b = 200);
+id	estRows	task	access object	operator info
+Projection	2.00	root		test.test5.id, test.test5.a, test.test5.b
+└─Batch_Point_Get	2.00	root	table:test5, index:uk_expr(tidb_shard(`a`), a, b)	keep order:false, desc:false
+explain format=brief select a+b from test5 where (a, b) in ((100, 100), (200, 200));
+id	estRows	task	access object	operator info
+Projection	2.00	root		plus(test.test5.a, test.test5.b)->Column#5
+└─Batch_Point_Get	2.00	root	table:test5, index:uk_expr(tidb_shard(`a`), a, b)	keep order:false, desc:false
+explain format=brief SELECT * FROM test3 WHERE a IN (100);
+id	estRows	task	access object	operator info
+Projection	1.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Point_Get	1.00	root	table:test3, index:uk_expr(tidb_shard(`a`), a)	
+explain format=brief SELECT * FROM test3 WHERE a IN (100, 200, 300);
+id	estRows	task	access object	operator info
+Projection	3.00	root		test.test3.id, test.test3.a, test.test3.b
+└─Batch_Point_Get	3.00	root	table:test3, index:uk_expr(tidb_shard(`a`), a)	keep order:false, desc:false
+drop table if exists test3, test5;
diff --git a/cmd/explaintest/t/explain_shard_index.test b/cmd/explaintest/t/explain_shard_index.test
@@ -0,0 +1,22 @@
+use test;
+drop table if exists test3, test5;
+create table test3(id int primary key clustered, a int, b int, unique key uk_expr((tidb_shard(a)),a));
+create table test5(id int primary key clustered, a int, b int, unique key uk_expr((tidb_shard(a)),a,b));
+
+explain format=brief select * from test3 where a=100;
+explain format=brief select * from test3 where a=100 and (b = 100 or b = 200);
+explain format=brief select * from test3 where tidb_shard(a) = 8;
+explain format=brief select * from test3 where a=100 or b = 200;
+explain format=brief select * from test3 where a=100 or a = 300;
+explain format=brief select * from test3 where a=100 or a = 300 or a > 997;
+explain format=brief select * from test3 where ((a=100 and b = 100) or a = 200) and b = 300;
+explain format=brief select * from test3 where a = b;
+explain format=brief select * from test3 where a = b and b = 100;
+explain format=brief select * from test5 where a=100 and b = 100;
+explain format=brief select * from test5 where (a=100 and b = 100) or  (a=200 and b = 200);
+explain format=brief select a+b from test5 where (a, b) in ((100, 100), (200, 200));
+explain format=brief SELECT * FROM test3 WHERE a IN (100);
+explain format=brief SELECT * FROM test3 WHERE a IN (100, 200, 300);
+
+drop table if exists test3, test5;
+
diff --git a/executor/show_test.go b/executor/show_test.go
@@ -1404,7 +1404,7 @@ func TestShowBuiltin(t *testing.T) {
 	res := tk.MustQuery("show builtins;")
 	require.NotNil(t, res)
 	rows := res.Rows()
-	const builtinFuncNum = 274
+	const builtinFuncNum = 275
 	require.Equal(t, len(rows), builtinFuncNum)
 	require.Equal(t, rows[0][0].(string), "abs")
 	require.Equal(t, rows[builtinFuncNum-1][0].(string), "yearweek")

diff --git a/expression/builtin.go b/expression/builtin.go
@@ -781,6 +781,7 @@ var funcs = map[string]functionClass{
 	ast.VitessHash:      &vitessHashFunctionClass{baseFunctionClass{ast.VitessHash, 1, 1}},
 	ast.UUIDToBin:       &uuidToBinFunctionClass{baseFunctionClass{ast.UUIDToBin, 1, 2}},
 	ast.BinToUUID:       &binToUUIDFunctionClass{baseFunctionClass{ast.BinToUUID, 1, 2}},
+	ast.TiDBShard:       &tidbShardFunctionClass{baseFunctionClass{ast.TiDBShard, 1, 1}},
 
 	// get_lock() and release_lock() are parsed but do nothing.
 	// It is used for preventing error in Ruby's activerecord migrations.

diff --git a/expression/builtin_miscellaneous.go b/expression/builtin_miscellaneous.go
@@ -58,6 +58,7 @@ var (
 	_ functionClass = &uuidToBinFunctionClass{}
 	_ functionClass = &binToUUIDFunctionClass{}
 	_ functionClass = &isUUIDFunctionClass{}
+	_ functionClass = &tidbShardFunctionClass{}
 )
 
 var (
@@ -92,6 +93,11 @@ var (
 	_ builtinFunc = &builtinNameConstDurationSig{}
 	_ builtinFunc = &builtinNameConstStringSig{}
 	_ builtinFunc = &builtinNameConstJSONSig{}
+	_ builtinFunc = &builtinTidbShardSig{}
+)
+
+const (
+	tidbShardBucketCount = 256
 )
 
 type sleepFunctionClass struct {
@@ -1304,3 +1310,49 @@ func swapStringUUID(str string) string {
 	copy(buf[18:], str[18:])
 	return string(buf)
 }
+
+type tidbShardFunctionClass struct {
+	baseFunctionClass
+}
+
+func (c *tidbShardFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
+	if err := c.verifyArgs(args); err != nil {
+		return nil, err
+	}
+	bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, types.ETInt)
+	if err != nil {
+		return nil, err
+	}
+
+	bf.tp.Flen = 4 //64 bit unsigned
+	bf.tp.Flag |= mysql.UnsignedFlag
+	types.SetBinChsClnFlag(bf.tp)
+
+	sig := &builtinTidbShardSig{bf}
+	sig.setPbCode(tipb.ScalarFuncSig_TiDBShard)
+	return sig, nil
+}
+
+type builtinTidbShardSig struct {
+	baseBuiltinFunc
+}
+
+func (b *builtinTidbShardSig) Clone() builtinFunc {
+	newSig := &builtinTidbShardSig{}
+	newSig.cloneFrom(&b.baseBuiltinFunc)
+	return newSig
+}
+
+// evalInt evals tidb_shard(int64).
+func (b *builtinTidbShardSig) evalInt(row chunk.Row) (int64, bool, error) {
+	shardKeyInt, isNull, err := b.args[0].EvalInt(b.ctx, row)
+	if isNull || err != nil {
+		return 0, true, err
+	}
+	var hashed uint64
+	if hashed, err = vitess.HashUint64(uint64(shardKeyInt)); err != nil {
+		return 0, true, err
+	}
+	hashed = hashed % tidbShardBucketCount
+	return int64(hashed), false, nil
+}
diff --git a/expression/builtin_miscellaneous_test.go b/expression/builtin_miscellaneous_test.go
@@ -577,3 +577,37 @@ func TestBinToUUID(t *testing.T) {
 	_, err := funcs[ast.BinToUUID].getFunction(ctx, []Expression{NewZero()})
 	require.NoError(t, err)
 }
+
+func TestTidbShard(t *testing.T) {
+	ctx := createContext(t)
+
+	fc := funcs[ast.TiDBShard]
+
+	// tidb_shard(-1) == 81, ......
+	args := makeDatums([]int{-1, 0, 1, 9999999999999999})
+	res := makeDatums([]int{81, 167, 214, 63})
+	for i, arg := range args {
+		f, err := fc.getFunction(ctx, datumsToConstants([]types.Datum{arg}))
+		require.NoError(t, err)
+		d, err := evalBuiltinFunc(f, chunk.Row{})
+		require.NoError(t, err)
+		trequire.DatumEqual(t, res[i], d)
+	}
+
+	// tidb_shard("string") always return 167
+	args2 := makeDatums([]string{"abc", "ope", "wopddd"})
+	res2 := makeDatums([]int{167})
+	for _, arg := range args2 {
+		f, err := fc.getFunction(ctx, datumsToConstants([]types.Datum{arg}))
+		require.NoError(t, err)
+		d, err := evalBuiltinFunc(f, chunk.Row{})
+		require.NoError(t, err)
+		trequire.DatumEqual(t, res2[0], d)
+	}
+
+	args3 := makeDatums([]int{-1, 0, 1, 9999999999999999})
+	{
+		_, err := fc.getFunction(ctx, datumsToConstants(args3))
+		require.Error(t, err)
+	}
+}
diff --git a/expression/column.go b/expression/column.go
@@ -20,6 +20,7 @@ import (
 	"strings"
 
 	"github.com/pingcap/errors"
+	"github.com/pingcap/tidb/parser/ast"
 	"github.com/pingcap/tidb/parser/charset"
 	"github.com/pingcap/tidb/parser/model"
 	"github.com/pingcap/tidb/parser/mysql"
@@ -552,8 +553,8 @@ func ColInfo2Col(cols []*Column, col *model.ColumnInfo) *Column {
 	return nil
 }
 
-// indexCol2Col finds the corresponding column of the IndexColumn in a column slice.
-func indexCol2Col(colInfos []*model.ColumnInfo, cols []*Column, col *model.IndexColumn) *Column {
+// IndexCol2Col finds the corresponding column of the IndexColumn in a column slice.
+func IndexCol2Col(colInfos []*model.ColumnInfo, cols []*Column, col *model.IndexColumn) *Column {
 	for i, info := range colInfos {
 		if info.Name.L == col.Name.L {
 			if col.Length > 0 && info.FieldType.Flen > col.Length {
@@ -576,7 +577,7 @@ func IndexInfo2PrefixCols(colInfos []*model.ColumnInfo, cols []*Column, index *m
 	retCols := make([]*Column, 0, len(index.Columns))
 	lengths := make([]int, 0, len(index.Columns))
 	for _, c := range index.Columns {
-		col := indexCol2Col(colInfos, cols, c)
+		col := IndexCol2Col(colInfos, cols, c)
 		if col == nil {
 			return retCols, lengths
 		}
@@ -598,7 +599,7 @@ func IndexInfo2Cols(colInfos []*model.ColumnInfo, cols []*Column, index *model.I
 	retCols := make([]*Column, 0, len(index.Columns))
 	lens := make([]int, 0, len(index.Columns))
 	for _, c := range index.Columns {
-		col := indexCol2Col(colInfos, cols, c)
+		col := IndexCol2Col(colInfos, cols, c)
 		if col == nil {
 			retCols = append(retCols, col)
 			lens = append(lens, types.UnspecifiedLength)
@@ -684,3 +685,31 @@ func SortColumns(cols []*Column) []*Column {
 	})
 	return sorted
 }
+
+// InColumnArray check whether the col is in the cols array
+func (col *Column) InColumnArray(cols []*Column) bool {
+	for _, c := range cols {
+		if col.Equal(nil, c) {
+			return true
+		}
+	}
+	return false
+}
+
+// GcColumnExprIsTidbShard check whether the expression is tidb_shard()
+func GcColumnExprIsTidbShard(virtualExpr Expression) bool {
+	if virtualExpr == nil {
+		return false
+	}
+
+	f, ok := virtualExpr.(*ScalarFunction)
+	if !ok {
+		return false
+	}
+
+	if f.FuncName.L != ast.TiDBShard {
+		return false
+	}
+
+	return true
+}
diff --git a/expression/column_test.go b/expression/column_test.go
@@ -18,6 +18,7 @@ import (
 	"fmt"
 	"testing"
 
+	"github.com/pingcap/tidb/parser/ast"
 	"github.com/pingcap/tidb/parser/model"
 	"github.com/pingcap/tidb/parser/mysql"
 	"github.com/pingcap/tidb/types"
@@ -228,3 +229,37 @@ func TestColHybird(t *testing.T) {
 		require.Equal(t, result.GetString(i), v)
 	}
 }
+
+func TestInColumnArray(t *testing.T) {
+	// normal case, col is in column array
+	col0, col1 := &Column{ID: 0, UniqueID: 0}, &Column{ID: 1, UniqueID: 1}
+	cols := []*Column{col0, col1}
+	require.True(t, col0.InColumnArray(cols))
+
+	// abnormal case, col is not in column array
+	require.False(t, col0.InColumnArray([]*Column{col1}))
+
+	// abnormal case, input is nil
+	require.False(t, col0.InColumnArray(nil))
+}
+
+func TestGcColumnExprIsTidbShard(t *testing.T) {
+	ctx := mock.NewContext()
+
+	// abnormal case
+	// nil, not tidb_shard
+	require.False(t, GcColumnExprIsTidbShard(nil))
+
+	// `a = 1`, not tidb_shard
+	ft := types.NewFieldType(mysql.TypeLonglong)
+	col := &Column{RetType: ft, Index: 0}
+	d1 := types.NewDatum(1)
+	con := &Constant{Value: d1, RetType: ft}
+	expr := NewFunctionInternal(ctx, ast.EQ, ft, col, con)
+	require.False(t, GcColumnExprIsTidbShard(expr))
+
+	// normal case
+	// tidb_shard(a) = 1
+	shardExpr := NewFunctionInternal(ctx, ast.TiDBShard, ft, col)
+	require.True(t, GcColumnExprIsTidbShard(shardExpr))
+}
diff --git a/parser/ast/functions.go b/parser/ast/functions.go
@@ -289,6 +289,7 @@ const (
 	UUIDToBin       = "uuid_to_bin"
 	BinToUUID       = "bin_to_uuid"
 	VitessHash      = "vitess_hash"
+	TiDBShard       = "tidb_shard"
 	// get_lock() and release_lock() is parsed but do nothing.
 	// It is used for preventing error in Ruby's activerecord migrations.
 	GetLock     = "get_lock"