Skip to content

Commit

Permalink
sql: support JSONB encoding and decoding for forward indexes
Browse files Browse the repository at this point in the history
Currently, it is not possible to create a primary and a secondary index
on a JSON column in CRDB. This is because forward indexes on JSONB
columns have not yet been implemented due to a lack of a valid
lexicographic ordering.

To address this, a key encoding strategy was developed for each JSON
value. In order to maintain a lexicographical ordering of the encodings
of JSON values, different marker bytes were defined in an order similar
to the order defined for these JSON values. Encodings for primitive JSON
values, such as Null, False and True, only consist of their marker
bytes.

e.g: To encode a JSON False:

`enc(false::JSONB) = [JSONB_Null_Tag]`

Encodings for JSON Strings and Numbers consist of a concatenation of
their respective marker bytes, the encoding of the string or the number
in consideration and a terminator byte to indicate that the encoding for
the JSON value has ended.

e.g: To encode a JSON String '"a"':

`enc('"a"'::JSONB) = [JSONB_String_Tag, enc("a"), JSONB_Terminator_Tag]`

Encodings for JSON Arrays and Objects consist of a concatenation of
their respective marker bytes, the total number of elements/key-value
pairs present within the container (in bytes), the encodings of the
elements present in the container followed by a terminator tag to
indicate the encoding for the given JSON container has ended.

e.g: To encode a JSON array '["a"]':

`enc('["a"]'::JSONB) = [JSONB_Array_Tag, enc(1), JSONB_String_Tag,
enc(a), JSONB_Terminator_Tag, JSONB_Terminator_Tag]`

Epic: CRDB-24501
Fixes: #35706

Release note: None
  • Loading branch information
Shivs11 committed Mar 8, 2023
1 parent 2cc95c9 commit 6f2795a
Show file tree
Hide file tree
Showing 9 changed files with 598 additions and 37 deletions.
16 changes: 2 additions & 14 deletions pkg/sql/colexec/sorttopk.eg.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pkg/sql/rowenc/keyside/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ go_library(
"decode.go",
"doc.go",
"encode.go",
"json.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside",
visibility = ["//visibility:public"],
Expand All @@ -19,8 +20,8 @@ go_library(
"//pkg/util/bitarray",
"//pkg/util/duration",
"//pkg/util/encoding",
"//pkg/util/errorutil/unimplemented",
"//pkg/util/ipaddr",
"//pkg/util/json",
"//pkg/util/timetz",
"//pkg/util/timeutil/pgdate",
"//pkg/util/uuid",
Expand Down
9 changes: 5 additions & 4 deletions pkg/sql/rowenc/keyside/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/ipaddr"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/timetz"
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
Expand Down Expand Up @@ -117,13 +118,13 @@ func Decode(
d, err := a.NewDCollatedString(r, valType.Locale())
return d, rkey, err
case types.JsonFamily:
// Don't attempt to decode the JSON value. Instead, just return the
// remaining bytes of the key.
jsonLen, err := encoding.PeekLength(key)
var json json.JSON
json, rkey, err = decodeJSONKey(key, dir)
if err != nil {
return nil, nil, err
}
return tree.DNull, key[jsonLen:], nil
d := a.NewDJSON(tree.DJSON{JSON: json})
return d, rkey, err
case types.BytesFamily:
var r []byte
if dir == encoding.Ascending {
Expand Down
3 changes: 1 addition & 2 deletions pkg/sql/rowenc/keyside/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ package keyside
import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -174,7 +173,7 @@ func Encode(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, error) {
// DEncodedKey carries an already encoded key.
return append(b, []byte(*t)...), nil
case *tree.DJSON:
return nil, unimplemented.NewWithIssue(35706, "unable to encode JSON as a table key")
return encodeJSONKey(b, val.(*tree.DJSON), dir)
}
return nil, errors.Errorf("unable to encode table key: %T", val)
}
195 changes: 195 additions & 0 deletions pkg/sql/rowenc/keyside/json.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package keyside

import (
"github.com/cockroachdb/apd/v3"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/errors"
)

// encodeJSONKey is responsible for encoding the different JSON
// values.
func encodeJSONKey(b []byte, json *tree.DJSON, dir encoding.Direction) ([]byte, error) {
return json.JSON.EncodeForwardIndex(b, dir)
}

// decodeJSONKey is responsible for decoding the different JSON
// values.
func decodeJSONKey(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
var err error
var typ encoding.Type
var jsonVal json.JSON

buf, typ, err = encoding.ValidateAndConsumeJSONKeyMarker(buf, dir)
if err != nil {
return nil, nil, err
}

switch typ {
case encoding.JSONNull, encoding.JSONNullDesc:
jsonVal, err = json.MakeJSON(nil)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON Null")
}
case encoding.JSONFalse, encoding.JSONFalseDesc:
jsonVal, err = json.MakeJSON(false)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON False")
}
case encoding.JSONTrue, encoding.JSONTrueDesc:
jsonVal, err = json.MakeJSON(true)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON True")
}
case encoding.JSONString, encoding.JSONStringDesc:
jsonVal, buf, err = decodeJSONString(buf, dir)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON String")
}
case encoding.JSONNumber:
var dec apd.Decimal
buf, dec, err = encoding.DecodeDecimalAscending(buf, nil)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Number")
}
if len(buf) == 0 {
return nil, nil, errors.New("cannot find JSON terminator")
}
buf = buf[1:] // removing the terminator
jsonVal = json.FromDecimal(dec)
case encoding.JSONNumberDesc:
var dec apd.Decimal
buf, dec, err = encoding.DecodeDecimalDescending(buf, nil)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Number")
}
if len(buf) == 0 {
return nil, nil, errors.New("cannot find JSON terminator")
}
buf = buf[1:] // removing the terminator
jsonVal = json.FromDecimal(dec)
case encoding.JSONArray, encoding.JSONArrayDesc:
jsonVal, buf, err = decodeJSONArray(buf, dir)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Array")
}
case encoding.JSONObject, encoding.JSONObjectDesc:
jsonVal, buf, err = decodeJSONObject(buf, dir)
if err != nil {
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Object")
}
}

return jsonVal, buf, nil
}

func decodeJSONString(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
// extracting the total number of elements in the byte array
var err error
var str string
buf, _, err = encoding.DecodeJSONValueLength(buf, dir)

if err != nil {
panic("could not decode the length of the JSON String")
}

switch dir {
case encoding.Ascending:
buf, str, err = encoding.DecodeUnsafeStringAscendingDeepCopy(buf, nil)
case encoding.Descending:
buf, str, err = encoding.DecodeUnsafeStringDescending(buf, nil)
}
if err != nil {
panic("could not decode the JSON string")
}
buf = buf[1:] // removing the terminator
jsonVal, err := json.MakeJSON(str)
if err != nil {
panic("could not make a JSON String from the input string")
}
return jsonVal, buf, nil
}

func decodeJSONArray(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
// extracting the total number of elements in the json array
var err error
buf, length, err := encoding.DecodeJSONValueLength(buf, dir)
if err != nil {
return nil, nil, errors.AssertionFailedf("could not decode the number" +
"of elements in the JSON Array")
}
// Pre-allocate the array builder with `length` number
// of JSON elements.
jsonArray := json.NewArrayBuilder(int(length))

var childElem json.JSON
for {
if len(buf) == 0 {
return nil, nil, errors.AssertionFailedf("invalid JSON array encoding (unterminated)")
}
if encoding.IsJSONKeyDone(buf, dir) {
buf = buf[1:]
return jsonArray.Build(), buf, nil
}
childElem, buf, err = decodeJSONKey(buf, dir)
if err != nil {
return nil, buf, err
}
jsonArray.Add(childElem)
}
}

func decodeJSONObject(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) {
// extracting the total number of elements in the json object
var err error
buf, length, err := encoding.DecodeJSONValueLength(buf, dir)
if err != nil {
return nil, nil, errors.AssertionFailedf("could not decode the number" +
"of elements in the JSON Object")
}

jsonObject := json.NewObjectBuilder(int(length))
var jsonKey, value json.JSON
for {
if len(buf) == 0 {
return nil, nil, errors.AssertionFailedf("invalid JSON Object encoding (unterminated)")
}
if encoding.IsJSONKeyDone(buf, dir) {
// JSONB Objects will have a terminator byte.
buf = buf[1:]
return jsonObject.Build(), buf, nil
}

// Assumption: The byte array given to us can be decoded into a
// valid JSON Object. In other words, for each JSON key there
// should be a valid JSON value.
jsonKey, buf, err = decodeJSONKey(buf, dir)

if err != nil {
return nil, buf, err
}

key, err := jsonKey.AsText()
if err != nil {
return nil, buf, err
}

value, buf, err = decodeJSONKey(buf, dir)
if err != nil {
return nil, buf, err
}

jsonObject.Add(*key, value)
}
}
2 changes: 1 addition & 1 deletion pkg/sql/rowenc/keyside/keyside_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func genEncodingDirection() gopter.Gen {
func hasKeyEncoding(typ *types.T) bool {
// Only some types are round-trip key encodable.
switch typ.Family() {
case types.JsonFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily,
case types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily,
types.GeographyFamily, types.GeometryFamily, types.TSVectorFamily, types.TSQueryFamily:
return false
case types.ArrayFamily:
Expand Down
Loading

0 comments on commit 6f2795a

Please sign in to comment.