Skip to content

Commit

Permalink
sql: support JSONB encoding and decoding for forward indexes
Browse files Browse the repository at this point in the history
Currently, it is not possible to create a primary and a secondary index
on a JSON column in CRDB. This is because forward indexes on JSONB
columns have not yet been implemented due to a lack of a valid
lexicographic ordering.

To address this, a key encoding strategy was developed for each JSON
value. In order to maintain a lexicographical ordering of the encodings
of JSON values, different marker bytes were defined in an order similar
to the order defined for these JSON values. Encodings for primitive JSON
values, such as Null, False and True, only consist of their marker
bytes.

e.g: To encode a JSON False:

`enc(false::JSONB) = [JSONB_Null_Tag]`

Encodings for JSON Strings and Numbers consist of a concatenation of
their respective marker bytes, the encoding of the string or the number
in consideration and a terminator byte to indicate that the encoding for
the JSON value has ended.

e.g: To encode a JSON String '"a"':

`enc('"a"'::JSONB) = [JSONB_String_Tag, enc("a"), JSONB_Terminator_Tag]`

Encodings for JSON Arrays and Objects consist of a concatenation of
their respective marker bytes, the total number of elements/key-value
pairs present within the container (in bytes), the encodings of the
elements present in the container followed by a terminator tag to
indicate the encoding for the given JSON container has ended.

e.g: To encode a JSON array '["a"]':

`enc('["a"]'::JSONB) = [JSONB_Array_Tag, enc(1), JSONB_String_Tag,
enc(a), JSONB_Terminator_Tag, JSONB_Terminator_Tag]`

Epic: CRDB-24501
Fixes: #35706

Release note: None
  • Loading branch information
Shivs11 committed Mar 7, 2023
1 parent 2cc95c9 commit 657a649
Show file tree
Hide file tree
Showing 9 changed files with 600 additions and 37 deletions.
16 changes: 2 additions & 14 deletions pkg/sql/colexec/sorttopk.eg.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pkg/sql/rowenc/keyside/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ go_library(
"decode.go",
"doc.go",
"encode.go",
"json.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside",
visibility = ["//visibility:public"],
Expand All @@ -19,8 +20,8 @@ go_library(
"//pkg/util/bitarray",
"//pkg/util/duration",
"//pkg/util/encoding",
"//pkg/util/errorutil/unimplemented",
"//pkg/util/ipaddr",
"//pkg/util/json",
"//pkg/util/timetz",
"//pkg/util/timeutil/pgdate",
"//pkg/util/uuid",
Expand Down
13 changes: 9 additions & 4 deletions pkg/sql/rowenc/keyside/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/ipaddr"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/timetz"
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
Expand Down Expand Up @@ -117,13 +118,17 @@ func Decode(
d, err := a.NewDCollatedString(r, valType.Locale())
return d, rkey, err
case types.JsonFamily:
// Don't attempt to decode the JSON value. Instead, just return the
// remaining bytes of the key.
jsonLen, err := encoding.PeekLength(key)
var json json.JSON
var rKey []byte
var dJson tree.Datum
json, rKey, err = decodeJSONKey(key, dir)
if err != nil {
return nil, nil, err
}
return tree.DNull, key[jsonLen:], nil

dJson = tree.NewDJSON(json)
d := a.NewDJSON(*dJson.(*tree.DJSON))
return d, rKey, err
case types.BytesFamily:
var r []byte
if dir == encoding.Ascending {
Expand Down
3 changes: 1 addition & 2 deletions pkg/sql/rowenc/keyside/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ package keyside
import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -174,7 +173,7 @@ func Encode(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, error) {
// DEncodedKey carries an already encoded key.
return append(b, []byte(*t)...), nil
case *tree.DJSON:
return nil, unimplemented.NewWithIssue(35706, "unable to encode JSON as a table key")
return encodeJSONKey(b, val.(*tree.DJSON), dir)
}
return nil, errors.Errorf("unable to encode table key: %T", val)
}
176 changes: 176 additions & 0 deletions pkg/sql/rowenc/keyside/json.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package keyside

import (
"github.com/cockroachdb/apd/v3"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/errors"
)

// encodeJSONKey is responsible for encoding the different JSON
// values.
func encodeJSONKey(b []byte, json *tree.DJSON, dir encoding.Direction) ([]byte, error) {
var err error
var buf []byte
jsonVal := json.JSON
buf, err = jsonVal.EncodeForwardIndex(b, dir)

if err != nil {
return buf, err
}
return buf, nil
}

// decodeJSONKey is responsible for decoding the different JSON
// values.
func decodeJSONKey(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
var err error
var typ encoding.Type
var jsonVal json.JSON

buf, typ, err = encoding.ValidateAndConsumeJSONKeyMarker(buf, dir)
if err != nil {
return nil, nil, err
}

if (typ == encoding.JSONNull) || (typ == encoding.JSONNullDesc) {
jsonVal, err = json.MakeJSON(nil)
if err != nil {
panic("could not decode JSON Null")
}
} else if (typ == encoding.JSONFalse) || (typ == encoding.JSONFalseDesc) {
jsonVal, err = json.MakeJSON(false)
if err != nil {
panic("could not decode JSON False")
}
} else if (typ == encoding.JSONTrue) || (typ == encoding.JSONTrueDesc) {
jsonVal, err = json.MakeJSON(true)
if err != nil {
panic("could not decode JSON True")
}
} else if typ == encoding.JSONString || typ == encoding.JSONStringDesc {
jsonVal, buf, err = decodeJSONString(buf, dir)
} else if typ == encoding.JSONNumber {
var dec apd.Decimal
buf, dec, err = encoding.DecodeDecimalAscending(buf, nil)
if err != nil {
panic("could not decode the JSON Number")
}
buf = buf[1:] // removing the terminator
jsonVal = json.FromDecimal(dec)
} else if typ == encoding.JSONNumberDesc {
var dec apd.Decimal
buf, dec, err = encoding.DecodeDecimalDescending(buf, nil)
if err != nil {
panic("could not decode the JSON Number")
}
buf = buf[1:] // removing the terminator
jsonVal = json.FromDecimal(dec)
} else if (typ == encoding.JSONArray) || (typ == encoding.JSONArrayDesc) {
jsonVal, buf, err = decodeJSONArray(buf, dir)
if err != nil {
panic("could not decode the JSON Array")
}
} else if (typ == encoding.JSONObject) || (typ == encoding.JSONObjectDesc) {
jsonVal, buf, err = decodeJSONObject(buf, dir)
if err != nil {
panic("could not decode the JSON Object")
}
}

return jsonVal, buf, nil
}

func decodeJSONString(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
// extracting the total number of elements in the byte array
var err error
var str string
buf, _, err = encoding.DecodeJSONValueLength(buf, dir)

if err != nil {
panic("could not decode the length of the JSON String")
}

switch dir {
case encoding.Ascending:
buf, str, err = encoding.DecodeUnsafeStringAscendingDeepCopy(buf, nil)
case encoding.Descending:
buf, str, err = encoding.DecodeUnsafeStringDescending(buf, nil)
}
if err != nil {
panic("could not decode the JSON string")
}
buf = buf[1:] // removing the terminator
jsonVal, err := json.MakeJSON(str)
if err != nil {
panic("could not make a JSON String from the input string")
}
return jsonVal, buf, nil
}

func decodeJSONArray(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
// extracting the total number of elements in the json array
var err error
buf, length, err := encoding.DecodeJSONValueLength(buf, dir)

// Pre-allocate the array builder with `length` number
// of JSON elements.
jsonArray := json.NewArrayBuilder(int(length))

var childElem json.JSON
for {
if len(buf) == 0 {
return nil, nil, errors.AssertionFailedf("invalid JSON array encoding (unterminated)")
}
if encoding.IsJSONKeyDone(buf, dir) {
buf = buf[1:]
return jsonArray.Build(), buf, nil
}
childElem, buf, err = decodeJSONKey(buf, dir)
if err != nil {
return nil, buf, err
}
jsonArray.Add(childElem)
}
}

func decodeJSONObject(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
// extracting the total number of elements in the json object
var err error
buf, length, err := encoding.DecodeJSONValueLength(buf, dir)

jsonObject := json.NewObjectBuilder(int(length))
var jsonKey, value json.JSON
for {
if len(buf) == 0 {
return nil, nil, errors.AssertionFailedf("invalid JSON Object encoding (unterminated)")
}
if encoding.IsJSONKeyDone(buf, dir) {
// JSONB Objects will have a terminator byte.
buf = buf[1:]
return jsonObject.Build(), buf, nil
}

// Assumption: The byte array given to us can be decoded into a
// valid JSON Object. In other words, for each JSON key there
// should be a valid JSON value.
jsonKey, buf, err = decodeJSONKey(buf, dir)

if err != nil {
return nil, buf, err
}

key, err := jsonKey.AsText()
if err != nil {
return nil, buf, err
}

value, buf, err = decodeJSONKey(buf, dir)
if err != nil {
return nil, buf, err
}

jsonObject.Add(*key, value)
}
}
2 changes: 1 addition & 1 deletion pkg/sql/rowenc/keyside/keyside_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func genEncodingDirection() gopter.Gen {
func hasKeyEncoding(typ *types.T) bool {
// Only some types are round-trip key encodable.
switch typ.Family() {
case types.JsonFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily,
case types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily,
types.GeographyFamily, types.GeometryFamily, types.TSVectorFamily, types.TSQueryFamily:
return false
case types.ArrayFamily:
Expand Down
Loading

0 comments on commit 657a649

Please sign in to comment.