From 273e3a81bcfe694c8cf9edf13c9936403c049d9b Mon Sep 17 00:00:00 2001 From: Maksim Bezsaznyj Date: Tue, 10 Nov 2020 19:50:41 -0500 Subject: [PATCH 1/3] POC implementation of Vitess hashing algorithm. --- expression/builtin.go | 1 + expression/builtin_miscellaneous.go | 78 +++++++++++++++++++++++++++++ expression/integration_test.go | 13 +++++ util/vitess/vitess_hash.go | 60 ++++++++++++++++++++++ util/vitess/vitess_hash_test.go | 75 +++++++++++++++++++++++++++ 5 files changed, 227 insertions(+) create mode 100644 util/vitess/vitess_hash.go create mode 100644 util/vitess/vitess_hash_test.go diff --git a/expression/builtin.go b/expression/builtin.go index 8df6a37587b20..388880d62d575 100644 --- a/expression/builtin.go +++ b/expression/builtin.go @@ -769,6 +769,7 @@ var funcs = map[string]functionClass{ ast.ReleaseAllLocks: &releaseAllLocksFunctionClass{baseFunctionClass{ast.ReleaseAllLocks, 0, 0}}, ast.UUID: &uuidFunctionClass{baseFunctionClass{ast.UUID, 0, 0}}, ast.UUIDShort: &uuidShortFunctionClass{baseFunctionClass{ast.UUIDShort, 0, 0}}, + ast.VitessHash: &vitessHashFunctionClass{baseFunctionClass{ast.VitessHash, 1, 1}}, // get_lock() and release_lock() are parsed but do nothing. // It is used for preventing error in Ruby's activerecord migrations. diff --git a/expression/builtin_miscellaneous.go b/expression/builtin_miscellaneous.go index cd9a7a2ca3bf0..f385d18ff747b 100644 --- a/expression/builtin_miscellaneous.go +++ b/expression/builtin_miscellaneous.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/vitess" "github.com/pingcap/tipb/go-tipb" ) @@ -51,6 +52,7 @@ var ( _ functionClass = &releaseAllLocksFunctionClass{} _ functionClass = &uuidFunctionClass{} _ functionClass = &uuidShortFunctionClass{} + _ functionClass = &vitessHashFunctionClass{} ) var ( @@ -73,6 +75,7 @@ var ( _ builtinFunc = &builtinIsIPv4MappedSig{} _ builtinFunc = &builtinIsIPv6Sig{} _ builtinFunc = &builtinUUIDSig{} + _ builtinFunc = &builtinVitessHashSig{} _ builtinFunc = &builtinNameConstIntSig{} _ builtinFunc = &builtinNameConstRealSig{} @@ -1045,3 +1048,78 @@ type uuidShortFunctionClass struct { func (c *uuidShortFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) { return nil, errFunctionNotExists.GenWithStackByArgs("FUNCTION", "UUID_SHORT") } + +type vitessHashFunctionClass struct { + baseFunctionClass +} + +func (c *vitessHashFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) { + if err := c.verifyArgs(args); err != nil { + return nil, err + } + argTp := args[0].GetType().EvalType() + bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETString, argTp) + if err != nil { + return nil, err + } + + bf.tp.Flen = 8 + types.SetBinChsClnFlag(bf.tp) + + sig := &builtinVitessHashSig{bf} + sig.setPbCode(tipb.ScalarFuncSig_VitessHash) + return sig, nil +} + +type builtinVitessHashSig struct { + baseBuiltinFunc +} + +func (b *builtinVitessHashSig) Clone() builtinFunc { + newSig := &builtinVitessHashSig{} + newSig.cloneFrom(&b.baseBuiltinFunc) + return newSig +} + +// evalString evals VITESS_HASH(int64|string|decimal). +func (b *builtinVitessHashSig) evalString(row chunk.Row) (string, bool, error) { + argTp := b.args[0].GetType().EvalType() + switch argTp { + case types.ETDecimal: + shardKeyDec, isNull, err := b.args[0].EvalDecimal(b.ctx, row) + if isNull || err != nil { + return "", true, err + } + shardKeyUint, err := shardKeyDec.ToUint() + if err != nil { + return "", true, err + } + hashed, err := vitess.VitessHashUint64(shardKeyUint) + if err != nil { + return "", true, err + } + return string(hashed), false, nil + case types.ETString: + shardKeyStr, isNull, err := b.args[0].EvalString(b.ctx, row) + if isNull || err != nil { + return "", true, err + } + hashed, err := vitess.VitessHash([]byte(shardKeyStr)) + if err != nil { + return "", true, err + } + return string(hashed), false, nil + case types.ETInt: + shardKeyInt, isNull, err := b.args[0].EvalInt(b.ctx, row) + if isNull || err != nil { + return "", true, err + } + hashed, err := vitess.VitessHashUint64(uint64(shardKeyInt)) + if err != nil { + return "", true, err + } + return string(hashed), false, nil + default: + return "", true, errIncorrectArgs.GenWithStackByArgs("VITESS_HASH") + } +} diff --git a/expression/integration_test.go b/expression/integration_test.go index 6b3294e22306a..e8316d6629066 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -7656,3 +7656,16 @@ func (s *testIntegrationSerialSuite) TestIssue20608(c *C) { tk := testkit.NewTestKit(c, s.store) tk.MustQuery("select '䇇Հ' collate utf8mb4_bin like '___Հ';").Check(testkit.Rows("0")) } + +func (s *testIntegrationSuite) TestVitessHash(c *C) { + defer s.cleanEnv(c) + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustQuery("select vitess_hash(30375298039) from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33")) + // Same as previous but passed as a binary string + tk.MustQuery("select vitess_hash(x'00000007128243F7') from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33")) + tk.MustQuery("select vitess_hash(1123) from t").Check(testkit.Rows("\x03\x1B\x56\x5D\x41\xBD\xF8\xCA")) + tk.MustQuery("select vitess_hash(30573721600) from t").Check(testkit.Rows("\x1E\xFD\x64\x39\xF2\x05\x0F\xFD")) + tk.MustQuery("select vitess_hash(convert(116, decimal(8,4))) from t").Check(testkit.Rows("\x1E\x17\x88\xFF\x0F\xDE\x09\x3C")) + tk.MustQuery(fmt.Sprintf("select vitess_hash(%d) from t", uint64(math.MaxUint64))).Check(testkit.Rows("\x35\x55\x50\xB2\x15\x0E\x24\x51")) +} diff --git a/util/vitess/vitess_hash.go b/util/vitess/vitess_hash.go new file mode 100644 index 0000000000000..b57d039260ffa --- /dev/null +++ b/util/vitess/vitess_hash.go @@ -0,0 +1,60 @@ +// Copyright 2017 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package vitess + +import ( + "crypto/cipher" + "crypto/des" + "encoding/binary" + "encoding/hex" + "fmt" + + "github.com/pingcap/errors" +) + +var nullKeyBlock cipher.Block + +func init() { + block, err := des.NewCipher(make([]byte, 8)) + if err != nil { + panic(errors.Trace(err)) + } + nullKeyBlock = block +} + +// VitessHash implements vitess' method of calculating a hash used for determining a shard key range. +// Uses a DES encryption with 64 bit key, 64 bit block, null-key +func VitessHash(shardKey []byte) ([]byte, error) { + if len(shardKey) > 8 { + return nil, fmt.Errorf("shard key is too long: %v", hex.EncodeToString(shardKey)) + } else if len(shardKey) == 8 { + var hashed [8]byte + nullKeyBlock.Encrypt(hashed[:], shardKey[:]) + return hashed[:], nil + } else { + var keybytes, hashed [8]byte + numericValue := binary.BigEndian.Uint64(shardKey) + binary.BigEndian.PutUint64(keybytes[:], numericValue) + nullKeyBlock.Encrypt(hashed[:], keybytes[:]) + return hashed[:], nil + } +} + +// VitessHashUint64 implements vitess' method of calculating a hash used for determining a shard key range. +// Uses a DES encryption with 64 bit key, 64 bit block, null-key +func VitessHashUint64(shardKey uint64) ([]byte, error) { + var keybytes [8]byte + binary.BigEndian.PutUint64(keybytes[:], shardKey) + return VitessHash(keybytes[:]) +} diff --git a/util/vitess/vitess_hash_test.go b/util/vitess/vitess_hash_test.go new file mode 100644 index 0000000000000..2640442dc57da --- /dev/null +++ b/util/vitess/vitess_hash_test.go @@ -0,0 +1,75 @@ +// Copyright 2017 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package vitess + +import ( + "encoding/hex" + . "github.com/pingcap/check" + "github.com/pingcap/tidb/util/testleak" + "math" + "strings" + "testing" +) + +var _ = Suite(&testVitessSuite{}) + +func TestT(t *testing.T) { + TestingT(t) +} + +type testVitessSuite struct { +} + +func toHex(buf []byte) string { + return strings.ToUpper(hex.EncodeToString(buf)) +} + +func fromHex(hexStr string) []byte { + if buf, err := hex.DecodeString(hexStr); err == nil { + return buf + } else { + panic(err) + } +} + +var _ = Suite(&testVitessSuite{}) + +func (s *testVitessSuite) TestVitessHash(c *C) { + defer testleak.AfterTest(c)() + + hashed, err := VitessHashUint64(30375298039) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "031265661E5F1133") + + // Same as previous value but passed as a []byte instead + hashed, err = VitessHash(fromHex("00000007128243F7")) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "031265661E5F1133") + + hashed, err = VitessHashUint64(1123) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "031B565D41BDF8CA") + + hashed, err = VitessHashUint64(30573721600) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "1EFD6439F2050FFD") + + hashed, err = VitessHashUint64(116) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "1E1788FF0FDE093C") + + hashed, err = VitessHashUint64(math.MaxUint64) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "355550B2150E2451") +} From 557067a64cd4ab82ed76e4a21194fa908657cd8d Mon Sep 17 00:00:00 2001 From: Maksim Bezsaznyj Date: Thu, 19 Nov 2020 11:35:44 -0500 Subject: [PATCH 2/3] Fix incorrect handling of keys < 64bit. --- expression/integration_test.go | 2 ++ util/vitess/vitess_hash.go | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/expression/integration_test.go b/expression/integration_test.go index e8316d6629066..3b575c1f3478b 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -7664,6 +7664,8 @@ func (s *testIntegrationSuite) TestVitessHash(c *C) { tk.MustQuery("select vitess_hash(30375298039) from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33")) // Same as previous but passed as a binary string tk.MustQuery("select vitess_hash(x'00000007128243F7') from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33")) + // Less bits, should be prefixed with zeroes + tk.MustQuery("select vitess_hash(x'07128243F7') from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33")) tk.MustQuery("select vitess_hash(1123) from t").Check(testkit.Rows("\x03\x1B\x56\x5D\x41\xBD\xF8\xCA")) tk.MustQuery("select vitess_hash(30573721600) from t").Check(testkit.Rows("\x1E\xFD\x64\x39\xF2\x05\x0F\xFD")) tk.MustQuery("select vitess_hash(convert(116, decimal(8,4))) from t").Check(testkit.Rows("\x1E\x17\x88\xFF\x0F\xDE\x09\x3C")) diff --git a/util/vitess/vitess_hash.go b/util/vitess/vitess_hash.go index b57d039260ffa..5e6b5fa6dd6a2 100644 --- a/util/vitess/vitess_hash.go +++ b/util/vitess/vitess_hash.go @@ -44,8 +44,7 @@ func VitessHash(shardKey []byte) ([]byte, error) { return hashed[:], nil } else { var keybytes, hashed [8]byte - numericValue := binary.BigEndian.Uint64(shardKey) - binary.BigEndian.PutUint64(keybytes[:], numericValue) + copy(keybytes[len(keybytes)-len(shardKey):], shardKey) nullKeyBlock.Encrypt(hashed[:], keybytes[:]) return hashed[:], nil } From 9a54a5ecaddba9ebc61652dff2e9615e1c68ef1d Mon Sep 17 00:00:00 2001 From: Maksim Bezsaznyj Date: Tue, 1 Dec 2020 10:38:39 -0500 Subject: [PATCH 3/3] Address PR comments --- util/vitess/vitess_hash.go | 2 +- util/vitess/vitess_hash_test.go | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/util/vitess/vitess_hash.go b/util/vitess/vitess_hash.go index 5e6b5fa6dd6a2..ca166b2d1204c 100644 --- a/util/vitess/vitess_hash.go +++ b/util/vitess/vitess_hash.go @@ -1,4 +1,4 @@ -// Copyright 2017 PingCAP, Inc. +// Copyright 2020 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/util/vitess/vitess_hash_test.go b/util/vitess/vitess_hash_test.go index 2640442dc57da..579b668eb2406 100644 --- a/util/vitess/vitess_hash_test.go +++ b/util/vitess/vitess_hash_test.go @@ -1,4 +1,4 @@ -// Copyright 2017 PingCAP, Inc. +// Copyright 2020 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,11 +15,12 @@ package vitess import ( "encoding/hex" - . "github.com/pingcap/check" - "github.com/pingcap/tidb/util/testleak" "math" "strings" "testing" + + . "github.com/pingcap/check" + "github.com/pingcap/tidb/util/testleak" ) var _ = Suite(&testVitessSuite{})