-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add Extract and Unquote functions for JSON. #3353
Changes from all commits
debac63
d58d95c
307b974
a4b2857
8f48d03
a478fe7
8423c01
e45591a
6f47e2a
5520fe1
725b0b8
9f34da5
c60e60e
9ff1d42
aff5ee6
d18073a
11465d1
b49429b
b504ec6
11da2b1
5057d7c
2042f1c
af0ca13
9e04b3c
4dddaa8
cb4cd7a
67e3677
5678eec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,167 @@ | ||||
// Copyright 2017 PingCAP, Inc. | ||||
// | ||||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||||
// you may not use this file except in compliance with the License. | ||||
// You may obtain a copy of the License at | ||||
// | ||||
// http://www.apache.org/licenses/LICENSE-2.0 | ||||
// | ||||
// Unless required by applicable law or agreed to in writing, software | ||||
// distributed under the License is distributed on an "AS IS" BASIS, | ||||
// See the License for the specific language governing permissions and | ||||
// limitations under the License. | ||||
|
||||
package json | ||||
|
||||
import ( | ||||
"bytes" | ||||
"fmt" | ||||
"unicode/utf8" | ||||
|
||||
"github.com/juju/errors" | ||||
) | ||||
|
||||
// Type returns type of JSON as string. | ||||
func (j JSON) Type() string { | ||||
switch j.typeCode { | ||||
case typeCodeObject: | ||||
return "OBJECT" | ||||
case typeCodeArray: | ||||
return "ARRAY" | ||||
case typeCodeLiteral: | ||||
switch byte(j.i64) { | ||||
case jsonLiteralNil: | ||||
return "NULL" | ||||
default: | ||||
return "BOOLEAN" | ||||
} | ||||
case typeCodeInt64: | ||||
return "INTEGER" | ||||
case typeCodeFloat64: | ||||
return "DOUBLE" | ||||
case typeCodeString: | ||||
return "STRING" | ||||
default: | ||||
msg := fmt.Sprintf(unknownTypeCodeErrorMsg, j.typeCode) | ||||
panic(msg) | ||||
} | ||||
} | ||||
|
||||
// Extract receives several path expressions as arguments, matches them in j, and returns: | ||||
// ret: target JSON matched any path expressions. maybe autowrapped as an array. | ||||
// found: true if any path expressions matched. | ||||
func (j JSON) Extract(pathExprList []PathExpression) (ret JSON, found bool) { | ||||
elemList := make([]JSON, 0, len(pathExprList)) | ||||
for _, pathExpr := range pathExprList { | ||||
elemList = append(elemList, extract(j, pathExpr)...) | ||||
} | ||||
if len(elemList) == 0 { | ||||
found = false | ||||
} else if len(pathExprList) == 1 && len(elemList) == 1 { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if pathExpr contains any asterisks, |
||||
// If pathExpr contains asterisks, len(elemList) won't be 1 | ||||
// even if len(pathExprList) equals to 1. | ||||
found = true | ||||
ret = elemList[0] | ||||
} else { | ||||
found = true | ||||
ret.typeCode = typeCodeArray | ||||
ret.array = append(ret.array, elemList...) | ||||
} | ||||
return | ||||
} | ||||
|
||||
// Unquote is for JSON_UNQUOTE. | ||||
func (j JSON) Unquote() (string, error) { | ||||
switch j.typeCode { | ||||
case typeCodeString: | ||||
return unquoteString(j.str) | ||||
default: | ||||
return j.String(), nil | ||||
} | ||||
} | ||||
|
||||
// unquoteString recognizes the escape sequences shown in: | ||||
// https://dev.mysql.com/doc/refman/5.7/en/json-modification-functions.html#json-unquote-character-escape-sequences | ||||
func unquoteString(s string) (string, error) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to consider UTF8 characters? Maybe this function can be used? tidb/util/stringutil/string_util.go Line 50 in 26d54c8
|
||||
ret := new(bytes.Buffer) | ||||
for i := 0; i < len(s); i++ { | ||||
if s[i] == '\\' { | ||||
i++ | ||||
if i == len(s) { | ||||
return "", errors.New("Missing a closing quotation mark in string") | ||||
} | ||||
switch s[i] { | ||||
case '"': | ||||
ret.WriteByte('"') | ||||
case 'b': | ||||
ret.WriteByte('\b') | ||||
case 'f': | ||||
ret.WriteByte('\f') | ||||
case 'n': | ||||
ret.WriteByte('\n') | ||||
case 'r': | ||||
ret.WriteByte('\r') | ||||
case 't': | ||||
ret.WriteByte('\t') | ||||
case '\\': | ||||
ret.WriteByte('\\') | ||||
case 'u': | ||||
if i+4 >= len(s) { | ||||
return "", errors.New("Invalid unicode") | ||||
} | ||||
unicode, size := utf8.DecodeRuneInString(s[i-1 : i+5]) | ||||
utf8Buf := make([]byte, size) | ||||
utf8.EncodeRune(utf8Buf, unicode) | ||||
ret.Write(utf8Buf) | ||||
i += 4 | ||||
default: | ||||
ret.WriteByte(s[i]) | ||||
} | ||||
} else { | ||||
ret.WriteByte(s[i]) | ||||
} | ||||
} | ||||
return ret.String(), nil | ||||
} | ||||
|
||||
// extract is used by Extract. | ||||
// NOTE: the return value will share something with j. | ||||
func extract(j JSON, pathExpr PathExpression) (ret []JSON) { | ||||
if len(pathExpr.legs) == 0 { | ||||
return []JSON{j} | ||||
} | ||||
currentLeg, subPathExpr := pathExpr.popOneLeg() | ||||
if currentLeg.typ == pathLegIndex && j.typeCode == typeCodeArray { | ||||
if currentLeg.arrayIndex == arrayIndexAsterisk { | ||||
for _, child := range j.array { | ||||
ret = append(ret, extract(child, subPathExpr)...) | ||||
} | ||||
} else if currentLeg.arrayIndex < len(j.array) { | ||||
childRet := extract(j.array[currentLeg.arrayIndex], subPathExpr) | ||||
ret = append(ret, childRet...) | ||||
} | ||||
} else if currentLeg.typ == pathLegKey && j.typeCode == typeCodeObject { | ||||
if len(currentLeg.dotKey) == 1 && currentLeg.dotKey[0] == '*' { | ||||
var sortedKeys = getSortedKeys(j.object) // iterate over sorted keys. | ||||
for _, child := range sortedKeys { | ||||
ret = append(ret, extract(j.object[child], subPathExpr)...) | ||||
} | ||||
} else if child, ok := j.object[currentLeg.dotKey]; ok { | ||||
childRet := extract(child, subPathExpr) | ||||
ret = append(ret, childRet...) | ||||
} | ||||
} else if currentLeg.typ == pathLegDoubleAsterisk { | ||||
ret = append(ret, extract(j, subPathExpr)...) | ||||
if j.typeCode == typeCodeArray { | ||||
for _, child := range j.array { | ||||
ret = append(ret, extract(child, pathExpr)...) | ||||
} | ||||
} else if j.typeCode == typeCodeObject { | ||||
var sortedKeys = getSortedKeys(j.object) | ||||
for _, child := range sortedKeys { | ||||
ret = append(ret, extract(j.object[child], pathExpr)...) | ||||
} | ||||
} | ||||
} | ||||
return | ||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
// Copyright 2017 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package json | ||
|
||
import ( | ||
"bytes" | ||
|
||
. "github.com/pingcap/check" | ||
) | ||
|
||
func (s *testJSONSuite) TestJSONType(c *C) { | ||
var tests = []struct { | ||
In string | ||
Out string | ||
}{ | ||
{`{"a": "b"}`, "OBJECT"}, | ||
{`["a", "b"]`, "ARRAY"}, | ||
{`3`, "INTEGER"}, | ||
{`3.0`, "DOUBLE"}, | ||
{`null`, "NULL"}, | ||
{`true`, "BOOLEAN"}, | ||
} | ||
for _, tt := range tests { | ||
j := mustParseFromString(tt.In) | ||
c.Assert(j.Type(), Equals, tt.Out) | ||
} | ||
} | ||
|
||
func (s *testJSONSuite) TestJSONExtract(c *C) { | ||
j1 := mustParseFromString(`{"a": [1, "2", {"aa": "bb"}, 4.0, {"aa": "cc"}], "b": true, "c": ["d"], "\"hello\"": "world"}`) | ||
j2 := mustParseFromString(`[{"a": 1, "b": true}, 3, 3.5, "hello, world", null, true]`) | ||
|
||
var tests = []struct { | ||
j JSON | ||
pathExprStrings []string | ||
expected JSON | ||
found bool | ||
err error | ||
}{ | ||
// test extract with only one path expression. | ||
{j1, []string{"$.a"}, j1.object["a"], true, nil}, | ||
{j2, []string{"$.a"}, CreateJSON(nil), false, nil}, | ||
{j1, []string{"$[0]"}, CreateJSON(nil), false, nil}, | ||
{j2, []string{"$[0]"}, j2.array[0], true, nil}, | ||
{j1, []string{"$.a[2].aa"}, CreateJSON("bb"), true, nil}, | ||
{j1, []string{"$.a[*].aa"}, mustParseFromString(`["bb", "cc"]`), true, nil}, | ||
{j1, []string{"$.*[0]"}, mustParseFromString(`[1, "d"]`), true, nil}, | ||
{j1, []string{`$.a[*]."aa"`}, mustParseFromString(`["bb", "cc"]`), true, nil}, | ||
{j1, []string{`$."\"hello\""`}, mustParseFromString(`"world"`), true, nil}, | ||
{j1, []string{`$**[0]`}, mustParseFromString(`[1, "d"]`), true, nil}, | ||
|
||
// test extract with multi path expressions. | ||
{j1, []string{"$.a", "$[0]"}, mustParseFromString(`[[1, "2", {"aa": "bb"}, 4.0, {"aa": "cc"}]]`), true, nil}, | ||
{j2, []string{"$.a", "$[0]"}, mustParseFromString(`[{"a": 1, "b": true}]`), true, nil}, | ||
} | ||
|
||
for _, tt := range tests { | ||
var pathExprList = make([]PathExpression, 0) | ||
for _, peStr := range tt.pathExprStrings { | ||
pe, err := ParseJSONPathExpr(peStr) | ||
c.Assert(err, IsNil) | ||
pathExprList = append(pathExprList, pe) | ||
} | ||
|
||
expected, found := tt.j.Extract(pathExprList) | ||
c.Assert(found, Equals, tt.found) | ||
if found { | ||
b1 := Serialize(expected) | ||
b2 := Serialize(tt.expected) | ||
c.Assert(bytes.Compare(b1, b2), Equals, 0) | ||
} | ||
} | ||
} | ||
|
||
func (s *testJSONSuite) TestJSONUnquote(c *C) { | ||
var tests = []struct { | ||
j string | ||
unquoted string | ||
}{ | ||
{j: `3`, unquoted: "3"}, | ||
{j: `"3"`, unquoted: "3"}, | ||
{j: `"hello, \"escaped quotes\" world"`, unquoted: "hello, \"escaped quotes\" world"}, | ||
{j: "\"\\u4f60\"", unquoted: "你"}, | ||
{j: `true`, unquoted: "true"}, | ||
{j: `null`, unquoted: "null"}, | ||
{j: `{"a": [1, 2]}`, unquoted: `{"a":[1,2]}`}, | ||
} | ||
for _, tt := range tests { | ||
j := mustParseFromString(tt.j) | ||
unquoted, err := j.Unquote() | ||
c.Assert(err, IsNil) | ||
c.Assert(unquoted, Equals, tt.unquoted) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How to distinguish the returned array is one of matched path, or a wrapped array?
Does it matters?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question. I will test
select json_extract('{"a": [1, 2]}', '$.a')
andselect json_extract('{"a": [1, 2]}', "$.a[0]", "$.a[1]")
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these two statements have same result value on MySQL 5.7, so it seems we cannot distinguish them, and we don't need, either.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok