From f76c6acf75152c98acc3c8334326dd69d0b2d6fa Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 6 Mar 2024 18:39:35 +0800 Subject: [PATCH] ttl: fix TTL cannot split tasks with right ranges for common handle int (#51532) (#51545) close pingcap/tidb#51527 --- pkg/ttl/cache/BUILD.bazel | 3 +- pkg/ttl/cache/split_test.go | 310 +++++++++++++++++++++++++++++++++++- pkg/ttl/cache/table.go | 116 ++++++++++++-- 3 files changed, 415 insertions(+), 14 deletions(-) diff --git a/pkg/ttl/cache/BUILD.bazel b/pkg/ttl/cache/BUILD.bazel index 839c53f1822b2..07057bd3b2d48 100644 --- a/pkg/ttl/cache/BUILD.bazel +++ b/pkg/ttl/cache/BUILD.bazel @@ -25,6 +25,7 @@ go_library( "//pkg/types", "//pkg/util/chunk", "//pkg/util/codec", + "//pkg/util/collate", "//pkg/util/logutil", "//pkg/util/mathutil", "@com_github_pingcap_errors//:errors", @@ -47,7 +48,7 @@ go_test( ], embed = [":cache"], flaky = True, - shard_count = 13, + shard_count = 16, deps = [ "//pkg/infoschema", "//pkg/kv", diff --git a/pkg/ttl/cache/split_test.go b/pkg/ttl/cache/split_test.go index 6f7e944e7a4a4..ef29af5560588 100644 --- a/pkg/ttl/cache/split_test.go +++ b/pkg/ttl/cache/split_test.go @@ -205,7 +205,11 @@ func (s *mockTiKVStore) GetRegionCache() *tikv.RegionCache { } func bytesHandle(t *testing.T, data []byte) kv.Handle { - encoded, err := codec.EncodeKey(nil, nil, types.NewBytesDatum(data)) + return commonHandle(t, types.NewBytesDatum(data)) +} + +func commonHandle(t *testing.T, d ...types.Datum) kv.Handle { + encoded, err := codec.EncodeKey(nil, nil, d...) require.NoError(t, err) h, err := kv.NewCommonHandle(encoded) require.NoError(t, err) @@ -270,7 +274,6 @@ func TestSplitTTLScanRangesWithSignedInt(t *testing.T) { createTTLTable(t, tk, "t4", "int"), createTTLTable(t, tk, "t5", "bigint"), createTTLTable(t, tk, "t6", ""), // no clustered - create2PKTTLTable(t, tk, "t7", "tinyint"), } tikvStore := newMockTiKVStore(t) @@ -332,7 +335,6 @@ func TestSplitTTLScanRangesWithUnsignedInt(t *testing.T) { createTTLTable(t, tk, "t3", "mediumint unsigned"), createTTLTable(t, tk, "t4", "int unsigned"), createTTLTable(t, tk, "t5", "bigint unsigned"), - create2PKTTLTable(t, tk, "t6", "tinyint unsigned"), } tikvStore := newMockTiKVStore(t) @@ -399,6 +401,106 @@ func TestSplitTTLScanRangesWithUnsignedInt(t *testing.T) { } } +func TestSplitTTLScanRangesCommonHandleSignedInt(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tbls := []*cache.PhysicalTable{ + create2PKTTLTable(t, tk, "t1", "bigint"), + create2PKTTLTable(t, tk, "t2", "int"), + } + + tikvStore := newMockTiKVStore(t) + for _, tbl := range tbls { + // test only one region + tikvStore.clearRegions() + ranges, err := tbl.SplitScanRanges(context.TODO(), tikvStore, 4) + require.NoError(t, err) + require.Equal(t, 1, len(ranges)) + checkRange(t, ranges[0], types.Datum{}, types.Datum{}) + + // test share regions with other table + tikvStore.clearRegions() + tikvStore.addRegion( + tablecodec.GenTablePrefix(tbl.ID-1), + tablecodec.GenTablePrefix(tbl.ID+1), + ) + ranges, err = tbl.SplitScanRanges(context.TODO(), tikvStore, 4) + require.NoError(t, err) + require.Equal(t, 1, len(ranges)) + checkRange(t, ranges[0], types.Datum{}, types.Datum{}) + + // test one table has multiple regions + tikvStore.clearRegions() + tikvStore.addRegionBeginWithTablePrefix(tbl.ID, commonHandle(t, types.NewIntDatum(-21))) + tikvStore.addRegionWithTablePrefix(tbl.ID, + commonHandle(t, types.NewIntDatum(-21)), + commonHandle(t, types.NewIntDatum(-19), types.NewIntDatum(0)), + ) + tikvStore.addRegionWithTablePrefix(tbl.ID, + commonHandle(t, types.NewIntDatum(-19), types.NewIntDatum(0)), + commonHandle(t, types.NewIntDatum(2)), + ) + tikvStore.addRegionEndWithTablePrefix(commonHandle(t, types.NewIntDatum(2)), tbl.ID) + ranges, err = tbl.SplitScanRanges(context.TODO(), tikvStore, 4) + require.NoError(t, err) + require.Equal(t, 4, len(ranges)) + checkRange(t, ranges[0], types.Datum{}, types.NewIntDatum(-21)) + checkRange(t, ranges[1], types.NewIntDatum(-21), types.NewIntDatum(-18)) + checkRange(t, ranges[2], types.NewIntDatum(-18), types.NewIntDatum(2)) + checkRange(t, ranges[3], types.NewIntDatum(2), types.Datum{}) + } +} + +func TestSplitTTLScanRangesCommonHandleUnsignedInt(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tbls := []*cache.PhysicalTable{ + create2PKTTLTable(t, tk, "t1", "bigint unsigned"), + create2PKTTLTable(t, tk, "t2", "int unsigned"), + } + + tikvStore := newMockTiKVStore(t) + for _, tbl := range tbls { + // test only one region + tikvStore.clearRegions() + ranges, err := tbl.SplitScanRanges(context.TODO(), tikvStore, 4) + require.NoError(t, err) + require.Equal(t, 1, len(ranges)) + checkRange(t, ranges[0], types.Datum{}, types.Datum{}) + + // test share regions with other table + tikvStore.clearRegions() + tikvStore.addRegion( + tablecodec.GenTablePrefix(tbl.ID-1), + tablecodec.GenTablePrefix(tbl.ID+1), + ) + ranges, err = tbl.SplitScanRanges(context.TODO(), tikvStore, 4) + require.NoError(t, err) + require.Equal(t, 1, len(ranges)) + checkRange(t, ranges[0], types.Datum{}, types.Datum{}) + + // test one table has multiple regions + tikvStore.clearRegions() + tikvStore.addRegionBeginWithTablePrefix(tbl.ID, commonHandle(t, types.NewUintDatum(9))) + tikvStore.addRegionWithTablePrefix(tbl.ID, + commonHandle(t, types.NewUintDatum(9)), + commonHandle(t, types.NewUintDatum(23), types.NewUintDatum(0)), + ) + tikvStore.addRegionWithTablePrefix(tbl.ID, + commonHandle(t, types.NewUintDatum(23), types.NewUintDatum(0)), + commonHandle(t, types.NewUintDatum(math.MaxInt64+9)), + ) + tikvStore.addRegionEndWithTablePrefix(commonHandle(t, types.NewUintDatum(math.MaxInt64+9)), tbl.ID) + ranges, err = tbl.SplitScanRanges(context.TODO(), tikvStore, 4) + require.NoError(t, err) + require.Equal(t, 4, len(ranges)) + checkRange(t, ranges[0], types.Datum{}, types.NewUintDatum(9)) + checkRange(t, ranges[1], types.NewUintDatum(9), types.NewUintDatum(24)) + checkRange(t, ranges[2], types.NewUintDatum(24), types.NewUintDatum(math.MaxInt64+9)) + checkRange(t, ranges[3], types.NewUintDatum(math.MaxInt64+9), types.Datum{}) + } +} + func TestSplitTTLScanRangesWithBytes(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) @@ -719,6 +821,7 @@ func TestGetNextBytesHandleDatum(t *testing.T) { } } } + func TestGetNextIntHandle(t *testing.T) { tblID := int64(7) cases := []struct { @@ -824,3 +927,204 @@ func TestGetNextIntHandle(t *testing.T) { } } } + +func TestGetNextIntDatumFromCommonHandle(t *testing.T) { + encode := func(tblID int64, d ...types.Datum) kv.Key { + encoded, err := codec.EncodeKey(nil, nil, d...) + require.NoError(t, err) + h, err := kv.NewCommonHandle(encoded) + require.NoError(t, err) + return tablecodec.EncodeRowKey(tblID, h.Encoded()) + } + + var nullDatum types.Datum + nullDatum.SetNull() + tblID := int64(7) + fixedLen := len(encode(tblID, types.NewIntDatum(0))) + + cases := []struct { + key kv.Key + d types.Datum + unsigned bool + }{ + { + key: encode(tblID, types.NewIntDatum(0)), + d: types.NewIntDatum(0), + }, + { + key: encode(tblID, types.NewIntDatum(1)), + d: types.NewIntDatum(1), + }, + { + key: encode(tblID, types.NewIntDatum(1024)), + d: types.NewIntDatum(1024), + }, + { + key: encode(tblID, types.NewIntDatum(math.MaxInt64)), + d: types.NewIntDatum(math.MaxInt64), + }, + { + key: encode(tblID, types.NewIntDatum(math.MaxInt64/2)), + d: types.NewIntDatum(math.MaxInt64 / 2), + }, + { + key: encode(tblID, types.NewIntDatum(-1)), + d: types.NewIntDatum(-1), + }, + { + key: encode(tblID, types.NewIntDatum(-1024)), + d: types.NewIntDatum(-1024), + }, + { + key: encode(tblID, types.NewIntDatum(math.MinInt64)), + d: types.NewIntDatum(math.MinInt64), + }, + { + key: encode(tblID, types.NewIntDatum(math.MinInt64/2)), + d: types.NewIntDatum(math.MinInt64 / 2), + }, + { + key: encode(tblID, types.NewIntDatum(math.MaxInt64))[:fixedLen-1], + d: types.NewIntDatum(math.MaxInt64 - 0xFF), + }, + { + key: encode(tblID, types.NewIntDatum(math.MaxInt64), types.NewIntDatum(0)), + d: nullDatum, + }, + { + key: encode(tblID, types.NewIntDatum(math.MaxInt64-1), types.NewIntDatum(0)), + d: types.NewIntDatum(math.MaxInt64), + }, + { + key: encode(tblID, types.NewIntDatum(123), types.NewIntDatum(0)), + d: types.NewIntDatum(124), + }, + { + key: encode(tblID, types.NewIntDatum(-123), types.NewIntDatum(0)), + d: types.NewIntDatum(-122), + }, + { + key: encode(tblID, types.NewIntDatum(math.MinInt64), types.NewIntDatum(0)), + d: types.NewIntDatum(math.MinInt64 + 1), + }, + { + key: encode(tblID, types.NewUintDatum(0)), + d: types.NewUintDatum(0), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(1)), + d: types.NewUintDatum(1), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(1024)), + d: types.NewUintDatum(1024), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(math.MaxInt64)), + d: types.NewUintDatum(math.MaxInt64), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(math.MaxInt64+1)), + d: types.NewUintDatum(math.MaxInt64 + 1), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(math.MaxUint64)), + d: types.NewUintDatum(math.MaxUint64), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(math.MaxUint64))[:fixedLen-1], + d: types.NewUintDatum(math.MaxUint64 - 0xFF), + unsigned: true, + }, + + { + key: encode(tblID, types.NewUintDatum(math.MaxUint64), types.NewIntDatum(0)), + d: nullDatum, + }, + { + key: encode(tblID, types.NewUintDatum(math.MaxUint64-1), types.NewIntDatum(0)), + d: types.NewUintDatum(math.MaxUint64), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(123), types.NewIntDatum(0)), + d: types.NewUintDatum(124), + unsigned: true, + }, + { + key: encode(tblID, types.NewUintDatum(0), types.NewIntDatum(0)), + d: types.NewUintDatum(1), + unsigned: true, + }, + { + key: []byte{}, + d: types.NewIntDatum(math.MinInt64), + }, + { + key: []byte{}, + d: types.NewUintDatum(0), + unsigned: true, + }, + { + key: tablecodec.GenTableRecordPrefix(tblID), + d: types.NewIntDatum(math.MinInt64), + }, + { + key: tablecodec.GenTableRecordPrefix(tblID), + d: types.NewUintDatum(0), + unsigned: true, + }, + { + // 3 is encoded intFlag + key: append(tablecodec.GenTableRecordPrefix(tblID), []byte{3}...), + d: types.NewIntDatum(math.MinInt64), + }, + { + // 3 is encoded intFlag + key: append(tablecodec.GenTableRecordPrefix(tblID), []byte{3}...), + d: types.NewUintDatum(0), + unsigned: true, + }, + { + // 4 is encoded uintFlag + key: append(tablecodec.GenTableRecordPrefix(tblID), []byte{4}...), + d: nullDatum, + }, + { + // 4 is encoded uintFlag + key: append(tablecodec.GenTableRecordPrefix(tblID), []byte{4}...), + d: types.NewUintDatum(0), + unsigned: true, + }, + { + // 5 + key: append(tablecodec.GenTableRecordPrefix(tblID), []byte{5}...), + d: nullDatum, + }, + { + // 5 + key: append(tablecodec.GenTableRecordPrefix(tblID), []byte{5}...), + d: nullDatum, + unsigned: true, + }, + } + + for _, c := range cases { + if !c.d.IsNull() { + if c.unsigned { + require.Equal(t, types.KindUint64, c.d.Kind()) + } else { + require.Equal(t, types.KindInt64, c.d.Kind()) + } + } + + d := cache.GetNextIntDatumFromCommonHandle(c.key, tablecodec.GenTableRecordPrefix(tblID), c.unsigned) + require.Equal(t, c.d, d) + } +} diff --git a/pkg/ttl/cache/table.go b/pkg/ttl/cache/table.go index 1e69faef9d34b..897de06d1be63 100644 --- a/pkg/ttl/cache/table.go +++ b/pkg/ttl/cache/table.go @@ -17,6 +17,7 @@ package cache import ( "context" "encoding/binary" + "encoding/hex" "fmt" "math" "time" @@ -33,6 +34,7 @@ import ( "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/tidb/pkg/util/collate" "github.com/pingcap/tidb/pkg/util/mathutil" "github.com/tikv/client-go/v2/tikv" ) @@ -221,12 +223,15 @@ func (t *PhysicalTable) SplitScanRanges(ctx context.Context, store kv.Storage, s ft := t.KeyColumns[0].FieldType switch ft.GetType() { case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24: + if len(t.KeyColumns) > 1 { + return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, true, mysql.HasUnsignedFlag(ft.GetFlag())) + } return t.splitIntRanges(ctx, tikvStore, splitCnt) case mysql.TypeBit: - return t.splitBinaryRanges(ctx, tikvStore, splitCnt) + return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, false, false) case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar: if mysql.HasBinaryFlag(ft.GetFlag()) { - return t.splitBinaryRanges(ctx, tikvStore, splitCnt) + return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, false, false) } } return []ScanRange{newFullRange()}, nil @@ -296,7 +301,9 @@ func (t *PhysicalTable) splitIntRanges(ctx context.Context, store tikv.Storage, return scanRanges, nil } -func (t *PhysicalTable) splitBinaryRanges(ctx context.Context, store tikv.Storage, splitCnt int) ([]ScanRange, error) { +func (t *PhysicalTable) splitCommonHandleRanges( + ctx context.Context, store tikv.Storage, splitCnt int, isInt bool, unsigned bool, +) ([]ScanRange, error) { recordPrefix := tablecodec.GenTableRecordPrefix(t.ID) startKey, endKey := recordPrefix, recordPrefix.PrefixNext() keyRanges, err := t.splitRawKeyRanges(ctx, store, startKey, endKey, splitCnt) @@ -317,11 +324,22 @@ func (t *PhysicalTable) splitBinaryRanges(ctx context.Context, store tikv.Storag curScanEnd := nullDatum() if i != len(keyRanges)-1 { - curScanEnd = GetNextBytesHandleDatum(keyRange.EndKey, recordPrefix) + if isInt { + curScanEnd = GetNextIntDatumFromCommonHandle(keyRange.EndKey, recordPrefix, unsigned) + } else { + curScanEnd = GetNextBytesHandleDatum(keyRange.EndKey, recordPrefix) + } } - if !curScanStart.IsNull() && !curScanEnd.IsNull() && kv.Key(curScanStart.GetBytes()).Cmp(curScanEnd.GetBytes()) >= 0 { - continue + if !curScanStart.IsNull() && !curScanEnd.IsNull() { + cmp, err := curScanStart.Compare(nil, &curScanEnd, collate.GetBinaryCollator()) + if err != nil { + return nil, err + } + + if cmp >= 0 { + continue + } } scanRanges = append(scanRanges, newDatumRange(curScanStart, curScanEnd)) @@ -377,12 +395,24 @@ func (t *PhysicalTable) splitRawKeyRanges(ctx context.Context, store tikv.Storag return ranges, nil } -var emptyBytesHandleKey kv.Key +var commonHandleBytesByte byte + +var commonHandleIntByte byte + +var commonHandleUintByte byte func init() { key, err := codec.EncodeKey(nil, nil, types.NewBytesDatum(nil)) terror.MustNil(err) - emptyBytesHandleKey = key + commonHandleBytesByte = key[0] + + key, err = codec.EncodeKey(nil, nil, types.NewIntDatum(0)) + terror.MustNil(err) + commonHandleIntByte = key[0] + + key, err = codec.EncodeKey(nil, nil, types.NewUintDatum(0)) + terror.MustNil(err) + commonHandleUintByte = key[0] } // GetNextIntHandle is used for int handle tables. @@ -422,6 +452,72 @@ func GetNextIntHandle(key kv.Key, recordPrefix []byte) kv.Handle { return kv.IntHandle(u + 1) } +// GetNextIntDatumFromCommonHandle is used for common handle tables with int value. +// It returns the min handle whose encoded key is or after argument `key` +// If it cannot find a valid value, a null datum will be returned. +func GetNextIntDatumFromCommonHandle(key kv.Key, recordPrefix []byte, unsigned bool) (d types.Datum) { + if key.Cmp(recordPrefix) > 0 && !key.HasPrefix(recordPrefix) { + d.SetNull() + return d + } + + typeByte := commonHandleIntByte + if unsigned { + typeByte = commonHandleUintByte + } + + var minDatum types.Datum + if unsigned { + minDatum.SetUint64(0) + } else { + minDatum.SetInt64(math.MinInt64) + } + + if key.Cmp(recordPrefix) <= 0 { + d = minDatum + return d + } + + encodedVal := key[len(recordPrefix):] + if encodedVal[0] < typeByte { + d = minDatum + return d + } + + if encodedVal[0] > typeByte { + d.SetNull() + return d + } + + if len(encodedVal) < 9 { + newVal := make([]byte, 9) + copy(newVal, encodedVal) + encodedVal = newVal + } + + _, v, err := codec.DecodeOne(encodedVal) + if err != nil { + // should never happen + terror.Log(errors.Annotatef(err, "TTL decode common handle failed, key: %s", hex.EncodeToString(key))) + return nullDatum() + } + + if len(encodedVal) > 9 { + if (unsigned && v.GetUint64() == math.MaxUint64) || (!unsigned && v.GetInt64() == math.MaxInt64) { + d.SetNull() + return d + } + + if unsigned { + v.SetUint64(v.GetUint64() + 1) + } else { + v.SetInt64(v.GetInt64() + 1) + } + } + + return v +} + // GetNextBytesHandleDatum is used for a table with one binary or string column common handle. // It returns the minValue whose encoded key is or after argument `key` // If it cannot find a valid value, a null datum will be returned. @@ -437,12 +533,12 @@ func GetNextBytesHandleDatum(key kv.Key, recordPrefix []byte) (d types.Datum) { } encodedVal := key[len(recordPrefix):] - if encodedVal[0] < emptyBytesHandleKey[0] { + if encodedVal[0] < commonHandleBytesByte { d.SetBytes([]byte{}) return d } - if encodedVal[0] > emptyBytesHandleKey[0] { + if encodedVal[0] > commonHandleBytesByte { d.SetNull() return d }