Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add Extract and Unquote functions for JSON. #3353

Merged
merged 28 commits into from
Jun 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions util/types/json/functions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package json

import (
"bytes"
"fmt"
"unicode/utf8"

"github.com/juju/errors"
)

// Type returns type of JSON as string.
func (j JSON) Type() string {
switch j.typeCode {
case typeCodeObject:
return "OBJECT"
case typeCodeArray:
return "ARRAY"
case typeCodeLiteral:
switch byte(j.i64) {
case jsonLiteralNil:
return "NULL"
default:
return "BOOLEAN"
}
case typeCodeInt64:
return "INTEGER"
case typeCodeFloat64:
return "DOUBLE"
case typeCodeString:
return "STRING"
default:
msg := fmt.Sprintf(unknownTypeCodeErrorMsg, j.typeCode)
panic(msg)
}
}

// Extract receives several path expressions as arguments, matches them in j, and returns:
// ret: target JSON matched any path expressions. maybe autowrapped as an array.
// found: true if any path expressions matched.
func (j JSON) Extract(pathExprList []PathExpression) (ret JSON, found bool) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How to distinguish the returned array is one of matched path, or a wrapped array?
Does it matters?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I will test select json_extract('{"a": [1, 2]}', '$.a') and select json_extract('{"a": [1, 2]}', "$.a[0]", "$.a[1]").

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these two statements have same result value on MySQL 5.7, so it seems we cannot distinguish them, and we don't need, either.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

elemList := make([]JSON, 0, len(pathExprList))
for _, pathExpr := range pathExprList {
elemList = append(elemList, extract(j, pathExpr)...)
}
if len(elemList) == 0 {
found = false
} else if len(pathExprList) == 1 && len(elemList) == 1 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems len(elemList) will always be 1 here ?

Copy link
Contributor Author

@hicqu hicqu May 31, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if pathExpr contains any asterisks, len(elemList) won't be 1 even if len(pathExprList) equals to 1.

// If pathExpr contains asterisks, len(elemList) won't be 1
// even if len(pathExprList) equals to 1.
found = true
ret = elemList[0]
} else {
found = true
ret.typeCode = typeCodeArray
ret.array = append(ret.array, elemList...)
}
return
}

// Unquote is for JSON_UNQUOTE.
func (j JSON) Unquote() (string, error) {
switch j.typeCode {
case typeCodeString:
return unquoteString(j.str)
default:
return j.String(), nil
}
}

// unquoteString recognizes the escape sequences shown in:
// https://dev.mysql.com/doc/refman/5.7/en/json-modification-functions.html#json-unquote-character-escape-sequences
func unquoteString(s string) (string, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to consider UTF8 characters?

Maybe this function can be used?

func UnquoteChar(s string, quote byte) (value []byte, tail string, err error) {

ret := new(bytes.Buffer)
for i := 0; i < len(s); i++ {
if s[i] == '\\' {
i++
if i == len(s) {
return "", errors.New("Missing a closing quotation mark in string")
}
switch s[i] {
case '"':
ret.WriteByte('"')
case 'b':
ret.WriteByte('\b')
case 'f':
ret.WriteByte('\f')
case 'n':
ret.WriteByte('\n')
case 'r':
ret.WriteByte('\r')
case 't':
ret.WriteByte('\t')
case '\\':
ret.WriteByte('\\')
case 'u':
if i+4 >= len(s) {
return "", errors.New("Invalid unicode")
}
unicode, size := utf8.DecodeRuneInString(s[i-1 : i+5])
utf8Buf := make([]byte, size)
utf8.EncodeRune(utf8Buf, unicode)
ret.Write(utf8Buf)
i += 4
default:
ret.WriteByte(s[i])
}
} else {
ret.WriteByte(s[i])
}
}
return ret.String(), nil
}

// extract is used by Extract.
// NOTE: the return value will share something with j.
func extract(j JSON, pathExpr PathExpression) (ret []JSON) {
if len(pathExpr.legs) == 0 {
return []JSON{j}
}
currentLeg, subPathExpr := pathExpr.popOneLeg()
if currentLeg.typ == pathLegIndex && j.typeCode == typeCodeArray {
if currentLeg.arrayIndex == arrayIndexAsterisk {
for _, child := range j.array {
ret = append(ret, extract(child, subPathExpr)...)
}
} else if currentLeg.arrayIndex < len(j.array) {
childRet := extract(j.array[currentLeg.arrayIndex], subPathExpr)
ret = append(ret, childRet...)
}
} else if currentLeg.typ == pathLegKey && j.typeCode == typeCodeObject {
if len(currentLeg.dotKey) == 1 && currentLeg.dotKey[0] == '*' {
var sortedKeys = getSortedKeys(j.object) // iterate over sorted keys.
for _, child := range sortedKeys {
ret = append(ret, extract(j.object[child], subPathExpr)...)
}
} else if child, ok := j.object[currentLeg.dotKey]; ok {
childRet := extract(child, subPathExpr)
ret = append(ret, childRet...)
}
} else if currentLeg.typ == pathLegDoubleAsterisk {
ret = append(ret, extract(j, subPathExpr)...)
if j.typeCode == typeCodeArray {
for _, child := range j.array {
ret = append(ret, extract(child, pathExpr)...)
}
} else if j.typeCode == typeCodeObject {
var sortedKeys = getSortedKeys(j.object)
for _, child := range sortedKeys {
ret = append(ret, extract(j.object[child], pathExpr)...)
}
}
}
return
}
105 changes: 105 additions & 0 deletions util/types/json/functions_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package json

import (
"bytes"

. "github.com/pingcap/check"
)

func (s *testJSONSuite) TestJSONType(c *C) {
var tests = []struct {
In string
Out string
}{
{`{"a": "b"}`, "OBJECT"},
{`["a", "b"]`, "ARRAY"},
{`3`, "INTEGER"},
{`3.0`, "DOUBLE"},
{`null`, "NULL"},
{`true`, "BOOLEAN"},
}
for _, tt := range tests {
j := mustParseFromString(tt.In)
c.Assert(j.Type(), Equals, tt.Out)
}
}

func (s *testJSONSuite) TestJSONExtract(c *C) {
j1 := mustParseFromString(`{"a": [1, "2", {"aa": "bb"}, 4.0, {"aa": "cc"}], "b": true, "c": ["d"], "\"hello\"": "world"}`)
j2 := mustParseFromString(`[{"a": 1, "b": true}, 3, 3.5, "hello, world", null, true]`)

var tests = []struct {
j JSON
pathExprStrings []string
expected JSON
found bool
err error
}{
// test extract with only one path expression.
{j1, []string{"$.a"}, j1.object["a"], true, nil},
{j2, []string{"$.a"}, CreateJSON(nil), false, nil},
{j1, []string{"$[0]"}, CreateJSON(nil), false, nil},
{j2, []string{"$[0]"}, j2.array[0], true, nil},
{j1, []string{"$.a[2].aa"}, CreateJSON("bb"), true, nil},
{j1, []string{"$.a[*].aa"}, mustParseFromString(`["bb", "cc"]`), true, nil},
{j1, []string{"$.*[0]"}, mustParseFromString(`[1, "d"]`), true, nil},
{j1, []string{`$.a[*]."aa"`}, mustParseFromString(`["bb", "cc"]`), true, nil},
{j1, []string{`$."\"hello\""`}, mustParseFromString(`"world"`), true, nil},
{j1, []string{`$**[0]`}, mustParseFromString(`[1, "d"]`), true, nil},

// test extract with multi path expressions.
{j1, []string{"$.a", "$[0]"}, mustParseFromString(`[[1, "2", {"aa": "bb"}, 4.0, {"aa": "cc"}]]`), true, nil},
{j2, []string{"$.a", "$[0]"}, mustParseFromString(`[{"a": 1, "b": true}]`), true, nil},
}

for _, tt := range tests {
var pathExprList = make([]PathExpression, 0)
for _, peStr := range tt.pathExprStrings {
pe, err := ParseJSONPathExpr(peStr)
c.Assert(err, IsNil)
pathExprList = append(pathExprList, pe)
}

expected, found := tt.j.Extract(pathExprList)
c.Assert(found, Equals, tt.found)
if found {
b1 := Serialize(expected)
b2 := Serialize(tt.expected)
c.Assert(bytes.Compare(b1, b2), Equals, 0)
}
}
}

func (s *testJSONSuite) TestJSONUnquote(c *C) {
var tests = []struct {
j string
unquoted string
}{
{j: `3`, unquoted: "3"},
{j: `"3"`, unquoted: "3"},
{j: `"hello, \"escaped quotes\" world"`, unquoted: "hello, \"escaped quotes\" world"},
{j: "\"\\u4f60\"", unquoted: "你"},
{j: `true`, unquoted: "true"},
{j: `null`, unquoted: "null"},
{j: `{"a": [1, 2]}`, unquoted: `{"a":[1,2]}`},
}
for _, tt := range tests {
j := mustParseFromString(tt.j)
unquoted, err := j.Unquote()
c.Assert(err, IsNil)
c.Assert(unquoted, Equals, tt.unquoted)
}
}
26 changes: 0 additions & 26 deletions util/types/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,32 +123,6 @@ func (j JSON) String() string {
return strings.TrimSpace(hack.String(bytes))
}

// Type returns type of JSON as string.
func (j JSON) Type() string {
switch j.typeCode {
case typeCodeObject:
return "OBJECT"
case typeCodeArray:
return "ARRAY"
case typeCodeLiteral:
switch byte(j.i64) {
case jsonLiteralNil:
return "NULL"
default:
return "BOOLEAN"
}
case typeCodeInt64:
return "INTEGER"
case typeCodeFloat64:
return "DOUBLE"
case typeCodeString:
return "STRING"
default:
msg := fmt.Sprintf(unknownTypeCodeErrorMsg, j.typeCode)
panic(msg)
}
}

var (
// ErrInvalidJSONText means invalid JSON text.
ErrInvalidJSONText = terror.ClassJSON.New(mysql.ErrInvalidJSONText, mysql.MySQLErrName[mysql.ErrInvalidJSONText])
Expand Down
Loading