Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expression: POC implementation of Vitess hashing algorithm. #20972

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions expression/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,7 @@ var funcs = map[string]functionClass{
ast.ReleaseAllLocks: &releaseAllLocksFunctionClass{baseFunctionClass{ast.ReleaseAllLocks, 0, 0}},
ast.UUID: &uuidFunctionClass{baseFunctionClass{ast.UUID, 0, 0}},
ast.UUIDShort: &uuidShortFunctionClass{baseFunctionClass{ast.UUIDShort, 0, 0}},
ast.VitessHash: &vitessHashFunctionClass{baseFunctionClass{ast.VitessHash, 1, 1}},

// get_lock() and release_lock() are parsed but do nothing.
// It is used for preventing error in Ruby's activerecord migrations.
Expand Down
78 changes: 78 additions & 0 deletions expression/builtin_miscellaneous.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/types/json"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/vitess"
"github.com/pingcap/tipb/go-tipb"
)

Expand All @@ -51,6 +52,7 @@ var (
_ functionClass = &releaseAllLocksFunctionClass{}
_ functionClass = &uuidFunctionClass{}
_ functionClass = &uuidShortFunctionClass{}
_ functionClass = &vitessHashFunctionClass{}
)

var (
Expand All @@ -73,6 +75,7 @@ var (
_ builtinFunc = &builtinIsIPv4MappedSig{}
_ builtinFunc = &builtinIsIPv6Sig{}
_ builtinFunc = &builtinUUIDSig{}
_ builtinFunc = &builtinVitessHashSig{}

_ builtinFunc = &builtinNameConstIntSig{}
_ builtinFunc = &builtinNameConstRealSig{}
Expand Down Expand Up @@ -1045,3 +1048,78 @@ type uuidShortFunctionClass struct {
func (c *uuidShortFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
return nil, errFunctionNotExists.GenWithStackByArgs("FUNCTION", "UUID_SHORT")
}

type vitessHashFunctionClass struct {
baseFunctionClass
}

func (c *vitessHashFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
if err := c.verifyArgs(args); err != nil {
return nil, err
}
argTp := args[0].GetType().EvalType()
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETString, argTp)
if err != nil {
return nil, err
}

bf.tp.Flen = 8
types.SetBinChsClnFlag(bf.tp)

sig := &builtinVitessHashSig{bf}
sig.setPbCode(tipb.ScalarFuncSig_VitessHash)
return sig, nil
}

type builtinVitessHashSig struct {
baseBuiltinFunc
}

func (b *builtinVitessHashSig) Clone() builtinFunc {
newSig := &builtinVitessHashSig{}
newSig.cloneFrom(&b.baseBuiltinFunc)
return newSig
}

// evalString evals VITESS_HASH(int64|string|decimal).
func (b *builtinVitessHashSig) evalString(row chunk.Row) (string, bool, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we implement the vectorized expression function for builtinVitessHashSig in expression/builtin_miscellaneous_vec.go?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Reminiscent Could you explain what are "vectorized expression functions" in this context?

Copy link
Contributor

@Reminiscent Reminiscent Dec 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Reminiscent Could you explain what are "vectorized expression functions" in this context?

You can see this blog for more information. In short, the expression function builtinVitessHashSig.evalString here processes one row of data at a time. And if you implement the vectorized expression function for builtinVitessHashSig.vecEvalString in expression/builtin_miscellaneous_vec.go, it can process a batch at a time. The blog introduces more information.

argTp := b.args[0].GetType().EvalType()
switch argTp {
case types.ETDecimal:
shardKeyDec, isNull, err := b.args[0].EvalDecimal(b.ctx, row)
if isNull || err != nil {
return "", true, err
}
shardKeyUint, err := shardKeyDec.ToUint()
if err != nil {
return "", true, err
}
hashed, err := vitess.VitessHashUint64(shardKeyUint)
if err != nil {
return "", true, err
}
return string(hashed), false, nil
case types.ETString:
shardKeyStr, isNull, err := b.args[0].EvalString(b.ctx, row)
if isNull || err != nil {
return "", true, err
}
hashed, err := vitess.VitessHash([]byte(shardKeyStr))
if err != nil {
return "", true, err
}
return string(hashed), false, nil
case types.ETInt:
shardKeyInt, isNull, err := b.args[0].EvalInt(b.ctx, row)
if isNull || err != nil {
return "", true, err
}
hashed, err := vitess.VitessHashUint64(uint64(shardKeyInt))
if err != nil {
return "", true, err
}
return string(hashed), false, nil
default:
return "", true, errIncorrectArgs.GenWithStackByArgs("VITESS_HASH")
}
}
15 changes: 15 additions & 0 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7656,3 +7656,18 @@ func (s *testIntegrationSerialSuite) TestIssue20608(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustQuery("select '䇇Հ' collate utf8mb4_bin like '___Հ';").Check(testkit.Rows("0"))
}

func (s *testIntegrationSuite) TestVitessHash(c *C) {
defer s.cleanEnv(c)
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustQuery("select vitess_hash(30375298039) from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we create a table t first?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not create t for the reason that there are many other function tests that do not do that. For example tests for str_to_date, hour, time, etc. If you think it's important - I will add t.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to add some statements like

tk.MustExec("drop table if exists t;")
tk.MustExec("create table t(a int);")

for safty.

// Same as previous but passed as a binary string
tk.MustQuery("select vitess_hash(x'00000007128243F7') from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33"))
// Less bits, should be prefixed with zeroes
tk.MustQuery("select vitess_hash(x'07128243F7') from t").Check(testkit.Rows("\x03\x12\x65\x66\x1E\x5F\x11\x33"))
tk.MustQuery("select vitess_hash(1123) from t").Check(testkit.Rows("\x03\x1B\x56\x5D\x41\xBD\xF8\xCA"))
tk.MustQuery("select vitess_hash(30573721600) from t").Check(testkit.Rows("\x1E\xFD\x64\x39\xF2\x05\x0F\xFD"))
tk.MustQuery("select vitess_hash(convert(116, decimal(8,4))) from t").Check(testkit.Rows("\x1E\x17\x88\xFF\x0F\xDE\x09\x3C"))
tk.MustQuery(fmt.Sprintf("select vitess_hash(%d) from t", uint64(math.MaxUint64))).Check(testkit.Rows("\x35\x55\x50\xB2\x15\x0E\x24\x51"))
}
59 changes: 59 additions & 0 deletions util/vitess/vitess_hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package vitess

import (
"crypto/cipher"
"crypto/des"
"encoding/binary"
"encoding/hex"
"fmt"

"github.com/pingcap/errors"
)

var nullKeyBlock cipher.Block

func init() {
block, err := des.NewCipher(make([]byte, 8))
if err != nil {
panic(errors.Trace(err))
}
nullKeyBlock = block
}

// VitessHash implements vitess' method of calculating a hash used for determining a shard key range.
// Uses a DES encryption with 64 bit key, 64 bit block, null-key
func VitessHash(shardKey []byte) ([]byte, error) {
if len(shardKey) > 8 {
return nil, fmt.Errorf("shard key is too long: %v", hex.EncodeToString(shardKey))
} else if len(shardKey) == 8 {
var hashed [8]byte
nullKeyBlock.Encrypt(hashed[:], shardKey[:])
return hashed[:], nil
} else {
var keybytes, hashed [8]byte
copy(keybytes[len(keybytes)-len(shardKey):], shardKey)
nullKeyBlock.Encrypt(hashed[:], keybytes[:])
return hashed[:], nil
}
}

// VitessHashUint64 implements vitess' method of calculating a hash used for determining a shard key range.
// Uses a DES encryption with 64 bit key, 64 bit block, null-key
func VitessHashUint64(shardKey uint64) ([]byte, error) {
var keybytes [8]byte
binary.BigEndian.PutUint64(keybytes[:], shardKey)
return VitessHash(keybytes[:])
}
76 changes: 76 additions & 0 deletions util/vitess/vitess_hash_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package vitess

import (
"encoding/hex"
bezmax marked this conversation as resolved.
Show resolved Hide resolved
"math"
"strings"
"testing"

. "github.com/pingcap/check"
"github.com/pingcap/tidb/util/testleak"
)

var _ = Suite(&testVitessSuite{})

func TestT(t *testing.T) {
TestingT(t)
}

type testVitessSuite struct {
}

func toHex(buf []byte) string {
return strings.ToUpper(hex.EncodeToString(buf))
}

func fromHex(hexStr string) []byte {
if buf, err := hex.DecodeString(hexStr); err == nil {
return buf
} else {
panic(err)
}
}

var _ = Suite(&testVitessSuite{})

func (s *testVitessSuite) TestVitessHash(c *C) {
defer testleak.AfterTest(c)()

hashed, err := VitessHashUint64(30375298039)
c.Assert(err, IsNil)
c.Assert(toHex(hashed), Equals, "031265661E5F1133")

// Same as previous value but passed as a []byte instead
hashed, err = VitessHash(fromHex("00000007128243F7"))
c.Assert(err, IsNil)
c.Assert(toHex(hashed), Equals, "031265661E5F1133")

hashed, err = VitessHashUint64(1123)
c.Assert(err, IsNil)
c.Assert(toHex(hashed), Equals, "031B565D41BDF8CA")

hashed, err = VitessHashUint64(30573721600)
c.Assert(err, IsNil)
c.Assert(toHex(hashed), Equals, "1EFD6439F2050FFD")

hashed, err = VitessHashUint64(116)
c.Assert(err, IsNil)
c.Assert(toHex(hashed), Equals, "1E1788FF0FDE093C")

hashed, err = VitessHashUint64(math.MaxUint64)
c.Assert(err, IsNil)
c.Assert(toHex(hashed), Equals, "355550B2150E2451")
}