-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
*: split index region with lower upper syntax #10409
Changes from all commits
e67ecf4
cbdb504
bd39a3b
4b7b306
1072642
4fa0832
ab3417e
5b5f871
553c9be
cdb0e6e
92fe00e
5033512
c5334ff
3668f80
2558457
078b8f4
208a15f
ad2af9f
90d5bef
f54c7f1
ccf2e8e
6ec298f
84d4240
ffbe922
289dbb3
ab2c03d
ccb183b
ac519e7
5cdbbf1
97a4610
95ec64b
a2037dd
1d1a2d7
e5ba5b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,13 +14,17 @@ | |
package executor | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
"encoding/binary" | ||
"math" | ||
|
||
"github.com/cznic/mathutil" | ||
"github.com/pingcap/errors" | ||
"github.com/pingcap/parser/model" | ||
"github.com/pingcap/tidb/kv" | ||
"github.com/pingcap/tidb/table" | ||
"github.com/pingcap/tidb/table/tables" | ||
"github.com/pingcap/tidb/tablecodec" | ||
"github.com/pingcap/tidb/types" | ||
"github.com/pingcap/tidb/util/chunk" | ||
"github.com/pingcap/tidb/util/logutil" | ||
|
@@ -31,8 +35,11 @@ import ( | |
type SplitIndexRegionExec struct { | ||
baseExecutor | ||
|
||
table table.Table | ||
tableInfo *model.TableInfo | ||
indexInfo *model.IndexInfo | ||
lower []types.Datum | ||
upper []types.Datum | ||
num int | ||
valueLists [][]types.Datum | ||
} | ||
|
||
|
@@ -48,18 +55,16 @@ func (e *SplitIndexRegionExec) Next(ctx context.Context, _ *chunk.RecordBatch) e | |
if !ok { | ||
return nil | ||
} | ||
regionIDs := make([]uint64, 0, len(e.valueLists)) | ||
index := tables.NewIndex(e.table.Meta().ID, e.table.Meta(), e.indexInfo) | ||
for _, values := range e.valueLists { | ||
idxKey, _, err := index.GenIndexKey(e.ctx.GetSessionVars().StmtCtx, values, math.MinInt64, nil) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
splitIdxKeys, err := e.getSplitIdxKeys() | ||
if err != nil { | ||
return err | ||
} | ||
regionIDs := make([]uint64, 0, len(splitIdxKeys)) | ||
for _, idxKey := range splitIdxKeys { | ||
regionID, err := s.SplitRegionAndScatter(idxKey) | ||
if err != nil { | ||
logutil.Logger(context.Background()).Warn("split table index region failed", | ||
zap.String("table", e.table.Meta().Name.L), | ||
zap.String("table", e.tableInfo.Name.L), | ||
zap.String("index", e.indexInfo.Name.L), | ||
zap.Error(err)) | ||
continue | ||
|
@@ -75,10 +80,127 @@ func (e *SplitIndexRegionExec) Next(ctx context.Context, _ *chunk.RecordBatch) e | |
if err != nil { | ||
logutil.Logger(context.Background()).Warn("wait scatter region failed", | ||
zap.Uint64("regionID", regionID), | ||
zap.String("table", e.table.Meta().Name.L), | ||
zap.String("table", e.tableInfo.Name.L), | ||
zap.String("index", e.indexInfo.Name.L), | ||
zap.Error(err)) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (e *SplitIndexRegionExec) getSplitIdxKeys() ([][]byte, error) { | ||
var idxKeys [][]byte | ||
if e.num > 0 { | ||
idxKeys = make([][]byte, 0, e.num) | ||
} else { | ||
idxKeys = make([][]byte, 0, len(e.valueLists)+1) | ||
} | ||
// Split in the start of the index key. | ||
startIdxKey := tablecodec.EncodeTableIndexPrefix(e.tableInfo.ID, e.indexInfo.ID) | ||
idxKeys = append(idxKeys, startIdxKey) | ||
|
||
index := tables.NewIndex(e.tableInfo.ID, e.tableInfo, e.indexInfo) | ||
// Split index regions by user specified value lists. | ||
if len(e.valueLists) > 0 { | ||
for _, v := range e.valueLists { | ||
idxKey, _, err := index.GenIndexKey(e.ctx.GetSessionVars().StmtCtx, v, math.MinInt64, nil) | ||
if err != nil { | ||
return nil, err | ||
} | ||
idxKeys = append(idxKeys, idxKey) | ||
} | ||
return idxKeys, nil | ||
} | ||
// Split index regions by lower, upper value and calculate the step by (upper - lower)/num. | ||
lowerIdxKey, _, err := index.GenIndexKey(e.ctx.GetSessionVars().StmtCtx, e.lower, math.MinInt64, nil) | ||
if err != nil { | ||
return nil, err | ||
} | ||
// Use math.MinInt64 as handle_id for the upper index key to avoid affecting calculate split point. | ||
// If use math.MaxInt64 here, test of `TestSplitIndex` will report error. | ||
upperIdxKey, _, err := index.GenIndexKey(e.ctx.GetSessionVars().StmtCtx, e.upper, math.MinInt64, nil) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why we don't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sound reasonable, But I just want to keep consistent with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Already add a comment here. |
||
if err != nil { | ||
return nil, err | ||
} | ||
if bytes.Compare(lowerIdxKey, upperIdxKey) >= 0 { | ||
lowerStr, err1 := datumSliceToString(e.lower) | ||
upperStr, err2 := datumSliceToString(e.upper) | ||
if err1 != nil || err2 != nil { | ||
return nil, errors.Errorf("Split index region `%v` lower value %v should less than the upper value %v", e.indexInfo.Name, e.lower, e.upper) | ||
} | ||
return nil, errors.Errorf("Split index region `%v` lower value %v should less than the upper value %v", e.indexInfo.Name, lowerStr, upperStr) | ||
} | ||
return getValuesList(lowerIdxKey, upperIdxKey, e.num, idxKeys), nil | ||
} | ||
|
||
// getValuesList is used to get `num` values between lower and upper value. | ||
// To Simplify the explain, suppose lower and upper value type is int64, and lower=0, upper=100, num=10, | ||
// then calculate the step=(upper-lower)/num=10, then the function should return 0+10, 10+10, 20+10... all together 9 (num-1) values. | ||
// Then the function will return [10,20,30,40,50,60,70,80,90]. | ||
// The difference is the value type of upper,lower is []byte, So I use getUint64FromBytes to convert []byte to uint64. | ||
func getValuesList(lower, upper []byte, num int, valuesList [][]byte) [][]byte { | ||
commonPrefixIdx := longestCommonPrefixLen(lower, upper) | ||
step := getStepValue(lower[commonPrefixIdx:], upper[commonPrefixIdx:], num) | ||
startV := getUint64FromBytes(lower[commonPrefixIdx:], 0) | ||
// To get `num` regions, only need to split `num-1` idx keys. | ||
buf := make([]byte, 8) | ||
for i := 0; i < num-1; i++ { | ||
value := make([]byte, 0, commonPrefixIdx+8) | ||
value = append(value, lower[:commonPrefixIdx]...) | ||
startV += step | ||
binary.BigEndian.PutUint64(buf, startV) | ||
value = append(value, buf...) | ||
valuesList = append(valuesList, value) | ||
} | ||
return valuesList | ||
} | ||
|
||
// longestCommonPrefixLen gets the longest common prefix byte length. | ||
func longestCommonPrefixLen(s1, s2 []byte) int { | ||
l := mathutil.Min(len(s1), len(s2)) | ||
i := 0 | ||
for ; i < l; i++ { | ||
if s1[i] != s2[i] { | ||
break | ||
} | ||
} | ||
return i | ||
} | ||
|
||
// getStepValue gets the step of between the lower and upper value. step = (upper-lower)/num. | ||
// Convert byte slice to uint64 first. | ||
func getStepValue(lower, upper []byte, num int) uint64 { | ||
lowerUint := getUint64FromBytes(lower, 0) | ||
upperUint := getUint64FromBytes(upper, 0xff) | ||
return (upperUint - lowerUint) / uint64(num) | ||
} | ||
|
||
// getUint64FromBytes gets a uint64 from the `bs` byte slice. | ||
// If len(bs) < 8, then padding with `pad`. | ||
func getUint64FromBytes(bs []byte, pad byte) uint64 { | ||
buf := bs | ||
if len(buf) < 8 { | ||
buf = make([]byte, 0, 8) | ||
buf = append(buf, bs...) | ||
for i := len(buf); i < 8; i++ { | ||
buf = append(buf, pad) | ||
} | ||
} | ||
return binary.BigEndian.Uint64(buf) | ||
} | ||
|
||
func datumSliceToString(ds []types.Datum) (string, error) { | ||
str := "(" | ||
for i, d := range ds { | ||
s, err := d.ToString() | ||
if err != nil { | ||
return str, err | ||
} | ||
if i > 0 { | ||
str += "," | ||
} | ||
str += s | ||
} | ||
str += ")" | ||
return str, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we refine the error message?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great catch! Thanks.
I think the error of
Unknown column
is reasonable.I will try to refine the error of
Data Truncated
.@bb7133