From 8f0f8ea0453a0dc2287aac0a2dda3f20041e0d48 Mon Sep 17 00:00:00 2001 From: Zhao Xin Date: Wed, 15 Apr 2020 10:38:01 +0800 Subject: [PATCH] =?UTF-8?q?expression:=20implement=20vectorized=20evaluati?= =?UTF-8?q?on=20for=20`builtinJSONSe=E2=80=A6=20(#15859)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- expression/builtin_json.go | 38 +---------- expression/builtin_json_test.go | 2 + expression/builtin_json_vec.go | 97 ++++++++++++++++++++++++++++- expression/builtin_json_vec_test.go | 9 ++- types/json/binary_functions.go | 42 +++++++++++++ 5 files changed, 150 insertions(+), 38 deletions(-) diff --git a/expression/builtin_json.go b/expression/builtin_json.go index f2f2bb0198d4c..c8dc6fa8554e5 100644 --- a/expression/builtin_json.go +++ b/expression/builtin_json.go @@ -26,7 +26,6 @@ import ( "github.com/pingcap/tidb/types/json" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/hack" - "github.com/pingcap/tidb/util/stringutil" "github.com/pingcap/tipb/go-tipb" ) @@ -1110,6 +1109,7 @@ func (b *builtinJSONSearchSig) evalJSON(row chunk.Row) (res json.BinaryJSON, isN if isNull || err != nil { return res, isNull, err } + containType = strings.ToLower(containType) if containType != json.ContainsPathAll && containType != json.ContainsPathOne { return res, true, errors.AddStack(json.ErrInvalidJSONContainsPathType) } @@ -1135,21 +1135,6 @@ func (b *builtinJSONSearchSig) evalJSON(row chunk.Row) (res json.BinaryJSON, isN return res, true, errIncorrectArgs.GenWithStackByArgs("ESCAPE") } } - patChars, patTypes := stringutil.CompilePattern(searchStr, escape) - - // result - result := make([]interface{}, 0) - - // walk json_doc - walkFn := func(fullpath json.PathExpression, bj json.BinaryJSON) (stop bool, err error) { - if bj.TypeCode == json.TypeCodeString && stringutil.DoMatch(string(bj.GetString()), patChars, patTypes) { - result = append(result, fullpath.String()) - if containType == json.ContainsPathOne { - return true, nil - } - } - return false, nil - } if len(b.args) >= 5 { // path... pathExprs := make([]json.PathExpression, 0, len(b.args)-4) for i := 4; i < len(b.args); i++ { @@ -1165,26 +1150,9 @@ func (b *builtinJSONSearchSig) evalJSON(row chunk.Row) (res json.BinaryJSON, isN } pathExprs = append(pathExprs, pathExpr) } - err = obj.Walk(walkFn, pathExprs...) - if err != nil { - return res, true, err - } - } else { - err = obj.Walk(walkFn) - if err != nil { - return res, true, err - } - } - - // return - switch len(result) { - case 0: - return res, true, nil - case 1: - return json.CreateBinary(result[0]), false, nil - default: - return json.CreateBinary(result), false, nil + return obj.Search(containType, searchStr, escape, pathExprs) } + return obj.Search(containType, searchStr, escape, nil) } type jsonStorageSizeFunctionClass struct { diff --git a/expression/builtin_json_test.go b/expression/builtin_json_test.go index 3a6481f8bf93e..0302e4f3c2d3d 100644 --- a/expression/builtin_json_test.go +++ b/expression/builtin_json_test.go @@ -791,6 +791,7 @@ func (s *testEvaluatorSuite) TestJSONSearch(c *C) { {[]interface{}{jsonString, `one`, `abc`}, `"$[0]"`, true}, {[]interface{}{jsonString, `all`, `abc`}, `["$[0]", "$[2].x"]`, true}, {[]interface{}{jsonString, `all`, `ghi`}, nil, true}, + {[]interface{}{jsonString, `ALL`, `ghi`}, nil, true}, {[]interface{}{jsonString, `all`, `10`}, `"$[1][0].k"`, true}, {[]interface{}{jsonString, `all`, `10`, nil, `$`}, `"$[1][0].k"`, true}, {[]interface{}{jsonString, `all`, `10`, nil, `$[*]`}, `"$[1][0].k"`, true}, @@ -825,6 +826,7 @@ func (s *testEvaluatorSuite) TestJSONSearch(c *C) { {[]interface{}{jsonString, `all`, `abc`, `??`}, nil, false}, // wrong escape_char {[]interface{}{jsonString, `all`, `abc`, nil, nil}, nil, true}, // NULL path {[]interface{}{jsonString, `all`, `abc`, nil, `$xx`}, nil, false}, // wrong path + {[]interface{}{jsonString, nil, `abc`}, nil, true}, } for _, t := range tbl { args := types.MakeDatums(t.input...) diff --git a/expression/builtin_json_vec.go b/expression/builtin_json_vec.go index 0a6213788f56c..d10eaf8c2442a 100644 --- a/expression/builtin_json_vec.go +++ b/expression/builtin_json_vec.go @@ -357,11 +357,104 @@ func (b *builtinJSONQuoteSig) vecEvalString(input *chunk.Chunk, result *chunk.Co } func (b *builtinJSONSearchSig) vectorized() bool { - return false + return true } func (b *builtinJSONSearchSig) vecEvalJSON(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + nr := input.NumRows() + jsonBuf, err := b.bufAllocator.get(types.ETJson, nr) + if err != nil { + return err + } + defer b.bufAllocator.put(jsonBuf) + if err := b.args[0].VecEvalJSON(b.ctx, input, jsonBuf); err != nil { + return err + } + typeBuf, err := b.bufAllocator.get(types.ETString, nr) + if err != nil { + return err + } + defer b.bufAllocator.put(typeBuf) + if err := b.args[1].VecEvalString(b.ctx, input, typeBuf); err != nil { + return err + } + searchBuf, err := b.bufAllocator.get(types.ETString, nr) + if err != nil { + return err + } + defer b.bufAllocator.put(searchBuf) + if err := b.args[2].VecEvalString(b.ctx, input, searchBuf); err != nil { + return err + } + + var escapeBuf *chunk.Column + if len(b.args) >= 4 { + escapeBuf, err = b.bufAllocator.get(types.ETString, nr) + if err != nil { + return err + } + defer b.bufAllocator.put(escapeBuf) + if err := b.args[3].VecEvalString(b.ctx, input, escapeBuf); err != nil { + return err + } + } + + var pathBufs []*chunk.Column + if len(b.args) >= 5 { + pathBufs = make([]*chunk.Column, (len(b.args) - 4)) + for i := 4; i < len(b.args); i++ { + index := i - 4 + pathBufs[index], err = b.bufAllocator.get(types.ETString, nr) + if err != nil { + return err + } + defer b.bufAllocator.put(pathBufs[index]) + if err := b.args[i].VecEvalString(b.ctx, input, pathBufs[index]); err != nil { + return err + } + } + } + + result.ReserveJSON(nr) + + for i := 0; i < nr; i++ { + if jsonBuf.IsNull(i) || searchBuf.IsNull(i) || typeBuf.IsNull(i) { + result.AppendNull() + continue + } + containType := strings.ToLower(typeBuf.GetString(i)) + escape := byte('\\') + if escapeBuf != nil && !escapeBuf.IsNull(i) { + escapeStr := escapeBuf.GetString(i) + if len(escapeStr) == 0 { + escape = byte('\\') + } else if len(escapeStr) == 1 { + escape = byte(escapeStr[0]) + } else { + return errIncorrectArgs.GenWithStackByArgs("ESCAPE") + } + } + var pathExprs []json.PathExpression + if pathBufs != nil { + pathExprs = make([]json.PathExpression, 0, len(b.args)-4) + for j := 0; j < len(b.args)-4; j++ { + if pathBufs[j].IsNull(i) { + break + } + pathExpr, err := json.ParseJSONPathExpr(pathBufs[j].GetString(i)) + if err != nil { + return json.ErrInvalidJSONPath.GenWithStackByArgs(pathBufs[j].GetString(i)) + } + pathExprs = append(pathExprs, pathExpr) + } + } + bj, _, err := jsonBuf.GetJSON(i).Search(containType, searchBuf.GetString(i), escape, pathExprs) + if err != nil { + return err + } + result.AppendJSON(bj) + } + return nil } func (b *builtinJSONSetSig) vectorized() bool { diff --git a/expression/builtin_json_vec_test.go b/expression/builtin_json_vec_test.go index 663659582c384..9ea511694db76 100644 --- a/expression/builtin_json_vec_test.go +++ b/expression/builtin_json_vec_test.go @@ -93,7 +93,14 @@ var vecBuiltinJSONCases = map[string][]vecExprBenchCase{ ast.JSONSet: { {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETJson, types.ETString, types.ETJson}, geners: []dataGenerator{nil, &constStrGener{"$.key"}, nil, &constStrGener{"$.aaa"}, nil}}, }, - ast.JSONSearch: {}, + ast.JSONSearch: { + {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETString}, geners: []dataGenerator{&constJSONGener{"[\"abc\", [{\"k\": \"10\"}, \"def\"], {\"x\":\"abc\"}, {\"y\":\"bcd\"}]"}, &constStrGener{"one"}, &constStrGener{"abc"}}}, + {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETString}, geners: []dataGenerator{&constJSONGener{"[\"abc\", [{\"k\": \"10\"}, \"def\"], {\"x\":\"abc\"}, {\"y\":\"bcd\"}]"}, &constStrGener{"all"}, &constStrGener{"abc"}}}, + {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETString}, geners: []dataGenerator{&constJSONGener{"[\"abc\", [{\"k\": \"10\"}, \"def\"], {\"x\":\"abc\"}, {\"y\":\"bcd\"}]"}, &constStrGener{"all"}, &constStrGener{"%a%"}}}, + {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETString, types.ETString, types.ETString}, geners: []dataGenerator{&constJSONGener{"[\"abc\", [{\"k\": \"10\"}, \"def\"], {\"x\":\"abc\"}, {\"y\":\"bcd\"}]"}, &constStrGener{"all"}, &constStrGener{"%a%"}, &constStrGener{}, &constStrGener{"$"}}}, + {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETString, types.ETString}, geners: []dataGenerator{&constJSONGener{"[\"abc\", [{\"k\": \"10\"}, \"def\"], {\"x\":\"abc\"}, {\"y\":\"bcd\"}]"}, &constStrGener{"all"}, &constStrGener{"%a%"}, &constStrGener{}}}, + {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETString, types.ETString, types.ETString}, geners: []dataGenerator{&constJSONGener{"[\"abc\", [{\"k\": \"10\"}, \"def\"], {\"x\":\"abc\"}, {\"y\":\"bcd\"}]"}, &constStrGener{"all"}, &constStrGener{"10"}, &constStrGener{""}, &constStrGener{"$[1][0]"}}}, + }, ast.JSONReplace: { {retEvalType: types.ETJson, childrenTypes: []types.EvalType{types.ETJson, types.ETString, types.ETJson}, geners: []dataGenerator{nil, &constStrGener{"$.key"}, nil}}, }, diff --git a/types/json/binary_functions.go b/types/json/binary_functions.go index b936b0eafd765..f621c02e5776a 100644 --- a/types/json/binary_functions.go +++ b/types/json/binary_functions.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/util/hack" + "github.com/pingcap/tidb/util/stringutil" ) // Type returns type of BinaryJSON as string. @@ -880,6 +881,47 @@ func (bj BinaryJSON) GetElemDepth() int { } } +// Search for JSON_Search +// rules referenced by MySQL JSON_SEARCH function +// [https://dev.mysql.com/doc/refman/5.7/en/json-search-functions.html#function_json-search] +func (bj BinaryJSON) Search(containType string, search string, escape byte, pathExpres []PathExpression) (res BinaryJSON, isNull bool, err error) { + if containType != ContainsPathOne && containType != ContainsPathAll { + return res, true, ErrInvalidJSONPath + } + patChars, patTypes := stringutil.CompilePattern(search, escape) + + result := make([]interface{}, 0) + walkFn := func(fullpath PathExpression, bj BinaryJSON) (stop bool, err error) { + if bj.TypeCode == TypeCodeString && stringutil.DoMatch(string(bj.GetString()), patChars, patTypes) { + result = append(result, fullpath.String()) + if containType == ContainsPathOne { + return true, nil + } + } + return false, nil + } + if len(pathExpres) != 0 { + err := bj.Walk(walkFn, pathExpres...) + if err != nil { + return res, true, err + } + } else { + err := bj.Walk(walkFn) + if err != nil { + return res, true, err + } + } + switch len(result) { + case 0: + return res, true, nil + case 1: + return CreateBinary(result[0]), false, nil + default: + return CreateBinary(result), false, nil + } + +} + // extractCallbackFn: the type of CALLBACK function for extractToCallback type extractCallbackFn func(fullpath PathExpression, bj BinaryJSON) (stop bool, err error)