From 83e3d4f56084d351a9e06977afb3928a21c63998 Mon Sep 17 00:00:00 2001 From: Shivam Saraf Date: Tue, 21 Feb 2023 13:19:45 -0500 Subject: [PATCH] sql: support JSONB encoding and decoding for forward indexes Currently, it is not possible to create a primary and a secondary index on a JSON column in CRDB. This is because forward indexes on JSONB columns have not yet been implemented due to a lack of a valid lexicographic ordering. To address this, a key encoding strategy was developed for each JSON value. In order to maintain a lexicographical ordering of the encodings of JSON values, different marker bytes were defined in an order similar to the order defined for these JSON values. Encodings for primitive JSON values, such as Null, False and True, only consist of their marker bytes. e.g: To encode a JSON False: `enc(false::JSONB) = [JSONB_Null_Tag]` Encodings for JSON Strings and Numbers consist of a concatenation of their respective marker bytes, the encoding of the string or the number in consideration and a terminator byte to indicate that the encoding for the JSON value has ended. e.g: To encode a JSON String '"a"': `enc('"a"'::JSONB) = [JSONB_String_Tag, enc("a"), JSONB_Terminator_Tag]` Encodings for JSON Arrays and Objects consist of a concatenation of their respective marker bytes, the total number of elements/key-value pairs present within the container (in bytes), the encodings of the elements present in the container followed by a terminator tag to indicate the encoding for the given JSON container has ended. e.g: To encode a JSON array '["a"]': `enc('["a"]'::JSONB) = [JSONB_Array_Tag, enc(1), JSONB_String_Tag, enc(a), JSONB_Terminator_Tag, JSONB_Terminator_Tag]` Epic: CRDB-24501 Fixes: #35706 Release note: None --- .../catalog/colinfo/column_type_properties.go | 2 +- .../colinfo/column_type_properties_test.go | 2 +- pkg/sql/catalog/tabledesc/index_test.go | 11 +- pkg/sql/colexec/sorttopk.eg.go | 16 +- .../logictest/testdata/logic_test/hash_join | 2 +- pkg/sql/rowenc/index_encoding.go | 2 +- pkg/sql/rowenc/keyside/BUILD.bazel | 3 +- pkg/sql/rowenc/keyside/decode.go | 9 +- pkg/sql/rowenc/keyside/encode.go | 3 +- pkg/sql/rowenc/keyside/json.go | 199 ++++++++++++ pkg/sql/rowenc/keyside/keyside_test.go | 2 +- pkg/sql/sem/tree/datum.go | 33 ++ pkg/util/encoding/encoding.go | 307 ++++++++++++++++-- pkg/util/encoding/type_string.go | 18 +- pkg/util/json/encoded.go | 21 ++ pkg/util/json/json.go | 116 +++++++ 16 files changed, 694 insertions(+), 52 deletions(-) create mode 100644 pkg/sql/rowenc/keyside/json.go diff --git a/pkg/sql/catalog/colinfo/column_type_properties.go b/pkg/sql/catalog/colinfo/column_type_properties.go index 7eeeddbb3cea..96ed30d5a633 100644 --- a/pkg/sql/catalog/colinfo/column_type_properties.go +++ b/pkg/sql/catalog/colinfo/column_type_properties.go @@ -52,6 +52,7 @@ func CanHaveCompositeKeyEncoding(typ *types.T) bool { switch typ.Family() { case types.FloatFamily, types.DecimalFamily, + types.JsonFamily, types.CollatedStringFamily: return true case types.ArrayFamily: @@ -75,7 +76,6 @@ func CanHaveCompositeKeyEncoding(typ *types.T) bool { types.UuidFamily, types.INetFamily, types.TimeFamily, - types.JsonFamily, types.TimeTZFamily, types.BitFamily, types.GeometryFamily, diff --git a/pkg/sql/catalog/colinfo/column_type_properties_test.go b/pkg/sql/catalog/colinfo/column_type_properties_test.go index fb985eaee505..9a1d1a0310df 100644 --- a/pkg/sql/catalog/colinfo/column_type_properties_test.go +++ b/pkg/sql/catalog/colinfo/column_type_properties_test.go @@ -52,7 +52,7 @@ func TestCanHaveCompositeKeyEncoding(t *testing.T) { {types.IntArray, false}, {types.Interval, false}, {types.IntervalArray, false}, - {types.Jsonb, false}, + {types.Jsonb, true}, {types.Name, false}, {types.Oid, false}, {types.String, false}, diff --git a/pkg/sql/catalog/tabledesc/index_test.go b/pkg/sql/catalog/tabledesc/index_test.go index 5bfe69d0949f..6e86ed17fc55 100644 --- a/pkg/sql/catalog/tabledesc/index_test.go +++ b/pkg/sql/catalog/tabledesc/index_test.go @@ -252,15 +252,20 @@ func TestIndexInterface(t *testing.T) { errMsgFmt := "Unexpected %s result for index '%s'." // Check index methods on features not tested here. - for _, idx := range indexes { + for pos, idx := range indexes { require.False(t, idx.IsDisabled(), errMsgFmt, "IsDisabled", idx.GetName()) require.False(t, idx.IsCreatedExplicitly(), errMsgFmt, "IsCreatedExplicitly", idx.GetName()) require.False(t, idx.HasOldStoredColumns(), errMsgFmt, "HasOldStoredColumns", idx.GetName()) - require.Equalf(t, 0, idx.NumCompositeColumns(), - errMsgFmt, "NumCompositeColumns", idx.GetName()) + if pos != 2 { + require.Equalf(t, 0, idx.NumCompositeColumns(), + errMsgFmt, "NumCompositeColumns", idx.GetName()) + } else { + require.Equalf(t, 1, idx.NumCompositeColumns(), + errMsgFmt, "NumCompositeColumns", idx.GetName()) + } } // Check particular index features. diff --git a/pkg/sql/colexec/sorttopk.eg.go b/pkg/sql/colexec/sorttopk.eg.go index bb9d8503df5a..05e83bfd1085 100644 --- a/pkg/sql/colexec/sorttopk.eg.go +++ b/pkg/sql/colexec/sorttopk.eg.go @@ -408,13 +408,7 @@ func spool_false(t *topKSorter) { } func compareRow_false( - t *topKSorter, - vecIdx1 int, - vecIdx2 int, - rowIdx1 int, - rowIdx2 int, - groupIdx1 int, - groupIdx2 int, + t *topKSorter, vecIdx1 int, vecIdx2 int, rowIdx1 int, rowIdx2 int, groupIdx1 int, groupIdx2 int, ) int { for i := range t.orderingCols { info := t.orderingCols[i] @@ -434,13 +428,7 @@ func compareRow_false( } func compareRow_true( - t *topKSorter, - vecIdx1 int, - vecIdx2 int, - rowIdx1 int, - rowIdx2 int, - groupIdx1 int, - groupIdx2 int, + t *topKSorter, vecIdx1 int, vecIdx2 int, rowIdx1 int, rowIdx2 int, groupIdx1 int, groupIdx2 int, ) int { for i := range t.orderingCols { // TODO(harding): If groupIdx1 != groupIdx2, we may be able to do some diff --git a/pkg/sql/logictest/testdata/logic_test/hash_join b/pkg/sql/logictest/testdata/logic_test/hash_join index 5fe844d9cfb3..534ebf5b08f6 100644 --- a/pkg/sql/logictest/testdata/logic_test/hash_join +++ b/pkg/sql/logictest/testdata/logic_test/hash_join @@ -221,5 +221,5 @@ SELECT * FROM t44797_2 WHERE EXISTS (SELECT * FROM t44797_2 AS l, t44797_3 AS r statement ok CREATE TABLE table57696(col_table TIME NOT NULL) -statement error unable to encode JSON as a table key\nHINT:.*\n.*35706.* +statement ok WITH cte (col_cte) AS ( SELECT * FROM ( VALUES ( ( 'false':::JSONB, '1970-01-05 16:57:40.000665+00:00':::TIMESTAMPTZ ) ) ) EXCEPT ALL SELECT * FROM ( VALUES ( ( ' [ [[true], [], {}, "b", {}], {"a": []}, {"c": 2.05750813403415} ] ':::JSONB, '1970-01-10 05:23:26.000428+00:00':::TIMESTAMPTZ ) ) ) ) SELECT * FROM cte, table57696 diff --git a/pkg/sql/rowenc/index_encoding.go b/pkg/sql/rowenc/index_encoding.go index 222fb1e4aacd..a53373af08d7 100644 --- a/pkg/sql/rowenc/index_encoding.go +++ b/pkg/sql/rowenc/index_encoding.go @@ -1446,7 +1446,7 @@ func GetValueColumns(index catalog.Index) []ValueEncodedColumn { id := index.GetCompositeColumnID(i) // Inverted indexes on a composite type (i.e. an array of composite types) // should not add the indexed column to the value. - if index.GetType() == descpb.IndexDescriptor_INVERTED && id == index.GetKeyColumnID(0) { + if index.GetType() == descpb.IndexDescriptor_INVERTED && id == index.InvertedColumnID() { continue } cols = append(cols, ValueEncodedColumn{ColID: id, IsComposite: true}) diff --git a/pkg/sql/rowenc/keyside/BUILD.bazel b/pkg/sql/rowenc/keyside/BUILD.bazel index bdd28f2773ce..37222c44ac4a 100644 --- a/pkg/sql/rowenc/keyside/BUILD.bazel +++ b/pkg/sql/rowenc/keyside/BUILD.bazel @@ -8,6 +8,7 @@ go_library( "decode.go", "doc.go", "encode.go", + "json.go", ], importpath = "github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside", visibility = ["//visibility:public"], @@ -19,8 +20,8 @@ go_library( "//pkg/util/bitarray", "//pkg/util/duration", "//pkg/util/encoding", - "//pkg/util/errorutil/unimplemented", "//pkg/util/ipaddr", + "//pkg/util/json", "//pkg/util/timetz", "//pkg/util/timeutil/pgdate", "//pkg/util/uuid", diff --git a/pkg/sql/rowenc/keyside/decode.go b/pkg/sql/rowenc/keyside/decode.go index 281303c81eef..fbcfa28d3901 100644 --- a/pkg/sql/rowenc/keyside/decode.go +++ b/pkg/sql/rowenc/keyside/decode.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/duration" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/ipaddr" + "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/timetz" "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" "github.com/cockroachdb/cockroach/pkg/util/uuid" @@ -117,13 +118,13 @@ func Decode( d, err := a.NewDCollatedString(r, valType.Locale()) return d, rkey, err case types.JsonFamily: - // Don't attempt to decode the JSON value. Instead, just return the - // remaining bytes of the key. - jsonLen, err := encoding.PeekLength(key) + var json json.JSON + json, rkey, err = decodeJSONKey(key, dir) if err != nil { return nil, nil, err } - return tree.DNull, key[jsonLen:], nil + d := a.NewDJSON(tree.DJSON{JSON: json}) + return d, rkey, err case types.BytesFamily: var r []byte if dir == encoding.Ascending { diff --git a/pkg/sql/rowenc/keyside/encode.go b/pkg/sql/rowenc/keyside/encode.go index f424dcf7c3bb..36cdaa6a3aa9 100644 --- a/pkg/sql/rowenc/keyside/encode.go +++ b/pkg/sql/rowenc/keyside/encode.go @@ -13,7 +13,6 @@ package keyside import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/util/encoding" - "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" "github.com/cockroachdb/errors" ) @@ -174,7 +173,7 @@ func Encode(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, error) { // DEncodedKey carries an already encoded key. return append(b, []byte(*t)...), nil case *tree.DJSON: - return nil, unimplemented.NewWithIssue(35706, "unable to encode JSON as a table key") + return encodeJSONKey(b, val.(*tree.DJSON), dir) } return nil, errors.Errorf("unable to encode table key: %T", val) } diff --git a/pkg/sql/rowenc/keyside/json.go b/pkg/sql/rowenc/keyside/json.go new file mode 100644 index 000000000000..5232781000d5 --- /dev/null +++ b/pkg/sql/rowenc/keyside/json.go @@ -0,0 +1,199 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package keyside + +import ( + "github.com/cockroachdb/apd/v3" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/util/encoding" + "github.com/cockroachdb/cockroach/pkg/util/json" + "github.com/cockroachdb/errors" +) + +// encodeJSONKey is responsible for encoding the different JSON +// values. +func encodeJSONKey(b []byte, json *tree.DJSON, dir encoding.Direction) ([]byte, error) { + return json.JSON.EncodeForwardIndex(b, dir) +} + +// decodeJSONKey is responsible for decoding the different JSON +// values. +func decodeJSONKey(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { + var err error + var typ encoding.Type + var jsonVal json.JSON + + buf, typ, err = encoding.ValidateAndConsumeJSONKeyMarker(buf, dir) + if err != nil { + return nil, nil, err + } + + switch typ { + case encoding.JSONNull, encoding.JSONNullDesc: + jsonVal, err = json.MakeJSON(nil) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON Null") + } + case encoding.JSONFalse, encoding.JSONFalseDesc: + jsonVal, err = json.MakeJSON(false) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON False") + } + case encoding.JSONTrue, encoding.JSONTrueDesc: + jsonVal, err = json.MakeJSON(true) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON True") + } + case encoding.JSONString, encoding.JSONStringDesc: + jsonVal, buf, err = decodeJSONString(buf, dir) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON String") + } + case encoding.JSONNumber: + var dec apd.Decimal + buf, dec, err = encoding.DecodeDecimalAscending(buf, nil) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Number") + } + if len(buf) == 0 || !encoding.IsJSONKeyDone(buf, dir) { + return nil, nil, errors.New("cannot find JSON terminator") + } + buf = buf[1:] // removing the terminator + jsonVal = json.FromDecimal(dec) + case encoding.JSONNumberDesc: + var dec apd.Decimal + buf, dec, err = encoding.DecodeDecimalDescending(buf, nil) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Number") + } + if len(buf) == 0 { + return nil, nil, errors.New("cannot find JSON terminator") + } + if !(encoding.IsJSONKeyDone(buf, dir)) { + return nil, nil, errors.New("invalid JSON terminator") + } + buf = buf[1:] // removing the terminator + jsonVal = json.FromDecimal(dec) + case encoding.JSONArray, encoding.JSONArrayDesc: + jsonVal, buf, err = decodeJSONArray(buf, dir) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Array") + } + case encoding.JSONObject, encoding.JSONObjectDesc: + jsonVal, buf, err = decodeJSONObject(buf, dir) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Object") + } + } + + return jsonVal, buf, nil +} + +func decodeJSONString(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { + var err error + var str string + + switch dir { + case encoding.Ascending: + buf, str, err = encoding.DecodeUnsafeStringAscendingDeepCopy(buf, nil) + case encoding.Descending: + buf, str, err = encoding.DecodeUnsafeStringDescending(buf, nil) + } + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode"+ + "the JSON String") + } + if len(buf) == 0 { + return nil, nil, errors.New("cannot find JSON terminator") + } + if !(encoding.IsJSONKeyDone(buf, dir)) { + return nil, nil, errors.New("invalid JSON terminator") + } + buf = buf[1:] // removing the terminator + jsonVal, err := json.MakeJSON(str) + if err != nil { + return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, + "could not make a JSON String from the input string") + } + return jsonVal, buf, nil +} + +func decodeJSONArray(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { + // Extracting the total number of elements in the json array. + var err error + buf, length, err := encoding.DecodeJSONValueLength(buf, dir) + if err != nil { + return nil, nil, errors.AssertionFailedf("could not decode the number" + + "of elements in the JSON Array") + } + // Pre-allocate the array builder with `length` number + // of JSON elements. + jsonArray := json.NewArrayBuilder(int(length)) + + var childElem json.JSON + for { + if len(buf) == 0 { + return nil, nil, errors.AssertionFailedf("invalid JSON array encoding (unterminated)") + } + if encoding.IsJSONKeyDone(buf, dir) { + buf = buf[1:] + return jsonArray.Build(), buf, nil + } + childElem, buf, err = decodeJSONKey(buf, dir) + if err != nil { + return nil, buf, err + } + jsonArray.Add(childElem) + } +} + +func decodeJSONObject(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { + // Extracting the total number of elements in the json object. + var err error + buf, length, err := encoding.DecodeJSONValueLength(buf, dir) + if err != nil { + return nil, nil, errors.AssertionFailedf("could not decode the number" + + "of elements in the JSON Object") + } + + jsonObject := json.NewObjectBuilder(int(length)) + var jsonKey, value json.JSON + for { + if len(buf) == 0 { + return nil, nil, errors.AssertionFailedf("invalid JSON Object encoding (unterminated)") + } + if encoding.IsJSONKeyDone(buf, dir) { + // JSONB Objects will have a terminator byte. + buf = buf[1:] + return jsonObject.Build(), buf, nil + } + + // Assumption: The byte array given to us can be decoded into a + // valid JSON Object. In other words, for each JSON key there + // should be a valid JSON value. + jsonKey, buf, err = decodeJSONKey(buf, dir) + if err != nil { + return nil, buf, err + } + + key, err := jsonKey.AsText() + if err != nil { + return nil, buf, err + } + + value, buf, err = decodeJSONKey(buf, dir) + if err != nil { + return nil, buf, err + } + + jsonObject.Add(*key, value) + } +} diff --git a/pkg/sql/rowenc/keyside/keyside_test.go b/pkg/sql/rowenc/keyside/keyside_test.go index b06e92424dec..e3b35f79b097 100644 --- a/pkg/sql/rowenc/keyside/keyside_test.go +++ b/pkg/sql/rowenc/keyside/keyside_test.go @@ -239,7 +239,7 @@ func genEncodingDirection() gopter.Gen { func hasKeyEncoding(typ *types.T) bool { // Only some types are round-trip key encodable. switch typ.Family() { - case types.JsonFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, + case types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, types.GeographyFamily, types.GeometryFamily, types.TSVectorFamily, types.TSQueryFamily: return false case types.ArrayFamily: diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index baf6e2ef2438..99173b9e222f 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -3612,6 +3612,39 @@ func NewDJSON(j json.JSON) *DJSON { return &DJSON{j} } +// DJSON implements the CompositeDatum interface +func (d *DJSON) IsComposite() bool { + switch d.JSON.Type() { + case json.NumberJSONType: + dec, _ := d.JSON.AsDecimal() + DDec := DDecimal{Decimal: *dec} + return DDec.IsComposite() + case json.ArrayJSONType: + jsonArray, _ := d.AsArray() + for _, elem := range jsonArray { + dJsonVal := DJSON{elem} + if dJsonVal.IsComposite() { + return true + } + } + case json.ObjectJSONType: + if it, _ := d.ObjectIter(); it != nil { + // assumption: no collated strings are + // present as JSON keys. Thus, JSON + // keys are not being checked if they + // are composite or not. + for it.Next() { + valDJSON := NewDJSON(it.Value()) + if valDJSON.IsComposite() { + return true + } + } + return false + } + } + return false +} + // ParseDJSON takes a string of JSON and returns a DJSON value. func ParseDJSON(s string) (Datum, error) { j, err := json.ParseJSON(s) diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index a9716372fda1..a9033ea93674 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -104,6 +104,16 @@ const ( emptyArray = geoInvertedIndexMarker + 1 voidMarker = emptyArray + 1 + // Defining different key markers, for the ascending designation, + // for handling different JSON values. + jsonNullKeyMarker = voidMarker + 1 + jsonStringKeyMarker = jsonNullKeyMarker + 1 + jsonNumberKeyMarker = jsonStringKeyMarker + 1 + jsonFalseKeyMarker = jsonNumberKeyMarker + 1 + jsonTrueKeyMarker = jsonFalseKeyMarker + 1 + jsonArrayKeyMarker = jsonTrueKeyMarker + 1 + jsonObjectKeyMarker = jsonArrayKeyMarker + 1 + arrayKeyTerminator byte = 0x00 arrayKeyDescendingTerminator byte = 0xFF // We use different null encodings for nulls within key arrays. Doing this @@ -114,6 +124,20 @@ const ( ascendingNullWithinArrayKey byte = 0x01 descendingNullWithinArrayKey byte = 0xFE + // Defining different key markers, for the descending designation, + // for handling different JSON values. + jsonNullKeyDescendingMarker = jsonObjectKeyMarker + 7 + jsonStringKeyDescendingMarker = jsonNullKeyDescendingMarker - 1 + jsonNumberKeyDescendingMarker = jsonStringKeyDescendingMarker - 1 + jsonFalseKeyDescendingMarker = jsonNumberKeyDescendingMarker - 1 + jsonTrueKeyDescendingMarker = jsonFalseKeyDescendingMarker - 1 + jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1 + jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1 + + // Terminators for JSON Key encoding. + jsonKeyTerminator byte = 0x00 + jsonKeyDescendingTerminator byte = 0xFF + // IntMin is chosen such that the range of int tags does not overlap the // ascii character set that is frequently used in testing. IntMin = 0x80 // 128 @@ -1698,20 +1722,34 @@ const ( // value requires more than 4 bits, and thus will be encoded in two bytes. It // is not used as a type value, and thus intentionally overlaps with the // subsequent type value. The 'Type' annotation is intentionally omitted here. - SentinelType = 15 - JSON Type = 15 - Tuple Type = 16 - BitArray Type = 17 - BitArrayDesc Type = 18 // BitArray encoded descendingly - TimeTZ Type = 19 - Geo Type = 20 - GeoDesc Type = 21 - ArrayKeyAsc Type = 22 // Array key encoding - ArrayKeyDesc Type = 23 // Array key encoded descendingly - Box2D Type = 24 - Void Type = 25 - TSQuery Type = 26 - TSVector Type = 27 + SentinelType = 15 + JSON Type = 15 + Tuple Type = 16 + BitArray Type = 17 + BitArrayDesc Type = 18 // BitArray encoded descendingly + TimeTZ Type = 19 + Geo Type = 20 + GeoDesc Type = 21 + ArrayKeyAsc Type = 22 // Array key encoding + ArrayKeyDesc Type = 23 // Array key encoded descendingly + Box2D Type = 24 + Void Type = 25 + TSQuery Type = 26 + TSVector Type = 27 + JSONNull Type = 28 + JSONNullDesc Type = 29 + JSONString Type = 30 + JSONStringDesc Type = 31 + JSONNumber Type = 32 + JSONNumberDesc Type = 33 + JSONFalse Type = 34 + JSONFalseDesc Type = 35 + JSONTrue Type = 36 + JSONTrueDesc Type = 37 + JSONArray Type = 38 + JSONArrayDesc Type = 39 + JSONObject Type = 40 + JSONObjectDesc Type = 41 ) // typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one @@ -1748,6 +1786,34 @@ func slowPeekType(b []byte) Type { return ArrayKeyAsc case m == arrayKeyDescendingMarker: return ArrayKeyDesc + case m == jsonNullKeyMarker: + return JSONNull + case m == jsonNullKeyDescendingMarker: + return JSONNullDesc + case m == jsonStringKeyMarker: + return JSONString + case m == jsonStringKeyDescendingMarker: + return JSONStringDesc + case m == jsonNumberKeyMarker: + return JSONNumber + case m == jsonNumberKeyDescendingMarker: + return JSONNumberDesc + case m == jsonFalseKeyMarker: + return JSONFalse + case m == jsonFalseKeyDescendingMarker: + return JSONFalseDesc + case m == jsonTrueKeyMarker: + return JSONTrue + case m == jsonTrueKeyDescendingMarker: + return JSONTrueDesc + case m == jsonArrayKeyMarker: + return JSONArray + case m == jsonArrayKeyDescendingMarker: + return JSONArrayDesc + case m == jsonObjectKeyMarker: + return JSONObject + case m == jsonObjectKeyDescendingMarker: + return JSONObjectDesc case m == bytesMarker: return Bytes case m == bytesDescMarker: @@ -1814,15 +1880,16 @@ func getMultiNonsortingVarintLen(b []byte, num int) (int, error) { return p, nil } -// getArrayLength returns the length of a key encoded array. The input -// must have had the array type marker stripped from the front. -func getArrayLength(buf []byte, dir Direction) (int, error) { +func getArrayOrJSONLength( + buf []byte, dir Direction, f func(buf []byte, dir Direction) bool, +) (int, error) { result := 0 + for { if len(buf) == 0 { - return 0, errors.AssertionFailedf("invalid array encoding (unterminated)") + return 0, errors.AssertionFailedf("invalid encoding (unterminated)") } - if IsArrayKeyDone(buf, dir) { + if f(buf, dir) { // Increment to include the terminator byte. result++ break @@ -1871,7 +1938,9 @@ func PeekLength(b []byte) (int, error) { switch m { case encodedNull, encodedNullDesc, encodedNotNull, encodedNotNullDesc, floatNaN, floatNaNDesc, floatZero, decimalZero, byte(True), byte(False), - emptyArray, voidMarker: + emptyArray, voidMarker, jsonNullKeyMarker, jsonNullKeyDescendingMarker, + jsonFalseKeyMarker, jsonFalseKeyDescendingMarker, jsonTrueKeyMarker, + jsonTrueKeyDescendingMarker: // ascendingNullWithinArrayKey and descendingNullWithinArrayKey also // contain the same byte values as encodedNotNull and encodedNotNullDesc // respectively, but they cannot be included explicitly in the case @@ -1891,12 +1960,40 @@ func PeekLength(b []byte) (int, error) { return 1 + n + m + 1, err } return 1 + n + m + 1, nil + case jsonStringKeyMarker, jsonStringKeyDescendingMarker, + jsonNumberKeyMarker, jsonNumberKeyDescendingMarker: + dir := Ascending + if (m == jsonStringKeyDescendingMarker) || + (m == jsonNumberKeyDescendingMarker) { + dir = Descending + } + length, err := getArrayOrJSONLength(b[1:], dir, IsJSONKeyDone) + return 1 + length, err + case jsonArrayKeyMarker, jsonArrayKeyDescendingMarker, + jsonObjectKeyMarker, jsonObjectKeyDescendingMarker: + dir := Ascending + if (m == jsonArrayKeyDescendingMarker) || + (m == jsonObjectKeyDescendingMarker) { + dir = Descending + } + // removing the starter tag + b = b[1:] + + // Getting the number of elements present + // in the container. + numberElems, err := getVarintLen(b) + if err != nil { + return -1, errors.AssertionFailedf("failed to get the number of elements" + + "in the container") + } + length, err := getArrayOrJSONLength(b[numberElems:], dir, IsJSONKeyDone) + return 1 + numberElems + length, err case arrayKeyMarker, arrayKeyDescendingMarker: dir := Ascending if m == arrayKeyDescendingMarker { dir = Descending } - length, err := getArrayLength(b[1:], dir) + length, err := getArrayOrJSONLength(b[1:], dir, IsArrayKeyDone) return 1 + length, err case bytesMarker: return getBytesLength(b, ascendingBytesEscapes) @@ -3297,6 +3394,136 @@ func getGeoInvertedIndexKeyLength(buf []byte) (int, error) { return 1 + cellLen + 1 + floatsLen, nil } +// EncodeJSONNullKeyMarker adds a JSON Null key encoding marker +// to buf and returns the new buffer. +func EncodeJSONNullKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonNullKeyMarker) + case Descending: + return append(buf, jsonNullKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeJSONStringKeyMarker adds a JSON String key encoding marker +// to buf and returns the new buffer. +func EncodeJSONStringKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonStringKeyMarker) + case Descending: + return append(buf, jsonStringKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeJSONNumberKeyMarker adds a JSON Number key encoding marker +// to buf and returns the new buffer. +func EncodeJSONNumberKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonNumberKeyMarker) + case Descending: + return append(buf, jsonNumberKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeJSONFalseKeyMarker adds a JSON False key encoding marker +// to buf and returns the new buffer. +func EncodeJSONFalseKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonFalseKeyMarker) + case Descending: + return append(buf, jsonFalseKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeJSONTrueKeyMarker adds a JSON True key encoding marker +// to buf and returns the new buffer. +func EncodeJSONTrueKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonTrueKeyMarker) + case Descending: + return append(buf, jsonTrueKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeJSONArrayKeyMarker adds a JSON Array key encoding marker +// to buf and returns the new buffer. +func EncodeJSONArrayKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonArrayKeyMarker) + case Descending: + return append(buf, jsonArrayKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeJSONKeyTerminator adds a JSON Key terminator +// to buf and returns the buffer. +func EncodeJSONKeyTerminator(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonKeyTerminator) + case Descending: + return append(buf, jsonKeyDescendingTerminator) + default: + panic("invalid direction") + } +} + +// EncodeJSONObjectKeyMarker adds a JSON Object key encoding marker +// to buf and returns the new buffer. +func EncodeJSONObjectKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, jsonObjectKeyMarker) + case Descending: + return append(buf, jsonObjectKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +func EncodeJSONValueLength(buf []byte, dir Direction, v int64) []byte { + switch dir { + case Ascending: + return EncodeVarintAscending(buf, v) + case Descending: + return EncodeVarintDescending(buf, v) + default: + panic("invalid direction") + } +} + +func DecodeJSONValueLength(buf []byte, dir Direction) ([]byte, int64, error) { + var v int64 + var err error + switch dir { + case Ascending: + buf, v, err = DecodeVarintAscending(buf) + return buf, v, err + case Descending: + buf, v, err = DecodeVarintDescending(buf) + return buf, v, err + default: + panic("invalid direction") + } +} + // EncodeArrayKeyMarker adds the array key encoding marker to buf and // returns the new buffer. func EncodeArrayKeyMarker(buf []byte, dir Direction) []byte { @@ -3345,6 +3572,34 @@ func IsNextByteArrayEncodedNull(buf []byte, dir Direction) bool { return buf[0] == expected } +// ValidateAndConsumeJSONKeyMarker checks that the marker at the front +// of buf is valid/invalid for a given JSON value for the given direction. +// If the JSON marker is valid, the marker is consumed and the remaining +// bytes in the array are returned. +func ValidateAndConsumeJSONKeyMarker(buf []byte, dir Direction) ([]byte, Type, error) { + typ := PeekType(buf) + switch dir { + case Descending: + switch typ { + case JSONNullDesc, JSONNumberDesc, JSONStringDesc, JSONFalseDesc, + JSONTrueDesc, JSONArrayDesc, JSONObjectDesc: + return buf[1:], typ, nil + default: + return nil, Unknown, errors.Newf("invalid type found %s", typ) + } + case Ascending: + switch typ { + case JSONNull, JSONNumber, JSONString, JSONFalse, JSONTrue, JSONArray, + JSONObject: + return buf[1:], typ, nil + default: + return nil, Unknown, errors.Newf("invalid type found %s", typ) + } + default: + return nil, Unknown, errors.Newf("invalid direction %s", typ) + } +} + // ValidateAndConsumeArrayKeyMarker checks that the marker at the front // of buf is valid for an array of the given direction, and consumes it // if so. It returns an error if the tag is invalid. @@ -3370,6 +3625,16 @@ func IsArrayKeyDone(buf []byte, dir Direction) bool { return buf[0] == expected } +// isJSONKeyDone returns if the first byte in the input is the JSON +// terminator for the input direction. +func IsJSONKeyDone(buf []byte, dir Direction) bool { + expected := jsonKeyTerminator + if dir == Descending { + expected = jsonKeyDescendingTerminator + } + return buf[0] == expected +} + // BytesNext returns the next possible byte slice, using the extra capacity // of the provided slice if possible, and if not, appending an \x00. func BytesNext(b []byte) []byte { diff --git a/pkg/util/encoding/type_string.go b/pkg/util/encoding/type_string.go index befbb9dc9b7a..d62dae712a22 100644 --- a/pkg/util/encoding/type_string.go +++ b/pkg/util/encoding/type_string.go @@ -36,11 +36,25 @@ func _() { _ = x[Void-25] _ = x[TSQuery-26] _ = x[TSVector-27] + _ = x[JSONNull-28] + _ = x[JSONNullDesc-29] + _ = x[JSONString-30] + _ = x[JSONStringDesc-31] + _ = x[JSONNumber-32] + _ = x[JSONNumberDesc-33] + _ = x[JSONFalse-34] + _ = x[JSONFalseDesc-35] + _ = x[JSONTrue-36] + _ = x[JSONTrueDesc-37] + _ = x[JSONArray-38] + _ = x[JSONArrayDesc-39] + _ = x[JSONObject-40] + _ = x[JSONObjectDesc-41] } -const _Type_name = "UnknownNullNotNullIntFloatDecimalBytesBytesDescTimeDurationTrueFalseUUIDArrayIPAddrJSONTupleBitArrayBitArrayDescTimeTZGeoGeoDescArrayKeyAscArrayKeyDescBox2DVoidTSQueryTSVector" +const _Type_name = "UnknownNullNotNullIntFloatDecimalBytesBytesDescTimeDurationTrueFalseUUIDArrayIPAddrJSONTupleBitArrayBitArrayDescTimeTZGeoGeoDescArrayKeyAscArrayKeyDescBox2DVoidTSQueryTSVectorJSONNullJSONNullDescJSONStringJSONStringDescJSONNumberJSONNumberDescJSONFalseJSONFalseDescJSONTrueJSONTrueDescJSONArrayJSONArrayDescJSONObjectJSONObjectDesc" -var _Type_index = [...]uint8{0, 7, 11, 18, 21, 26, 33, 38, 47, 51, 59, 63, 68, 72, 77, 83, 87, 92, 100, 112, 118, 121, 128, 139, 151, 156, 160, 167, 175} +var _Type_index = [...]uint16{0, 7, 11, 18, 21, 26, 33, 38, 47, 51, 59, 63, 68, 72, 77, 83, 87, 92, 100, 112, 118, 121, 128, 139, 151, 156, 160, 167, 175, 183, 195, 205, 219, 229, 243, 252, 265, 273, 285, 294, 307, 317, 331} func (i Type) String() string { if i < 0 || i >= Type(len(_Type_index)-1) { diff --git a/pkg/util/json/encoded.go b/pkg/util/json/encoded.go index c4e04dd5156e..6b0e50e4bc94 100644 --- a/pkg/util/json/encoded.go +++ b/pkg/util/json/encoded.go @@ -19,6 +19,7 @@ import ( "github.com/cockroachdb/apd/v3" "github.com/cockroachdb/cockroach/pkg/sql/inverted" + "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/errors" ) @@ -583,6 +584,17 @@ func (j *jsonEncoded) AsBool() (bool, bool) { return decoded.AsBool() } +func (j *jsonEncoded) AsArray() ([]JSON, bool) { + if dec := j.alreadyDecoded(); dec != nil { + return dec.AsArray() + } + decoded, err := j.decode() + if err != nil { + return nil, false + } + return decoded.AsArray() +} + func (j *jsonEncoded) Compare(other JSON) (int, error) { if other == nil { return -1, nil @@ -739,6 +751,15 @@ func (j *jsonEncoded) Len() int { return j.containerLen } +// EncodeForwardIndex implements the JSON interface. +func (j *jsonEncoded) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + decoded, err := j.decode() + if err != nil { + return nil, err + } + return decoded.EncodeForwardIndex(buf, dir) +} + // EncodeInvertedIndexKeys implements the JSON interface. func (j *jsonEncoded) encodeInvertedIndexKeys(b []byte) ([][]byte, error) { // TODO(justin): this could possibly be optimized. diff --git a/pkg/util/json/json.go b/pkg/util/json/json.go index d2ba389e32df..171ea07e865d 100644 --- a/pkg/util/json/json.go +++ b/pkg/util/json/json.go @@ -95,6 +95,11 @@ type JSON interface { // Size returns the size of the JSON document in bytes. Size() uintptr + // EncodeForwardIndex implements forward indexing for JSONB values. + // The encoding depends on the direction of the encoding + // specified, using `dir`, and is appended to `buf` and returned. + EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) + // encodeInvertedIndexKeys takes in a key prefix and returns a slice of // inverted index keys, one per path through the receiver. encodeInvertedIndexKeys(b []byte) ([][]byte, error) @@ -182,6 +187,10 @@ type JSON interface { // and a boolean indicating if this JSON value is a bool type. AsBool() (bool, bool) + // AsArray returns the JSON document as an Array if it is a array type, + // and a boolean indicating if this JSON value is a array type. + AsArray() ([]JSON, bool) + // Exists implements the `?` operator: does the string exist as a top-level // key within the JSON value? // @@ -848,6 +857,34 @@ func (j jsonObject) Size() uintptr { return valSize } +func (j jsonNull) AsArray() ([]JSON, bool) { + return nil, false +} + +func (j jsonString) AsArray() ([]JSON, bool) { + return nil, false +} + +func (j jsonFalse) AsArray() ([]JSON, bool) { + return nil, false +} + +func (j jsonTrue) AsArray() ([]JSON, bool) { + return nil, false +} + +func (j jsonObject) AsArray() ([]JSON, bool) { + return nil, false +} + +func (j jsonArray) AsArray() ([]JSON, bool) { + return j, true +} + +func (j jsonNumber) AsArray() ([]JSON, bool) { + return nil, false +} + // parseJSONGoStd parses json using encoding/json library. // TODO(yevgeniy): Remove this code once we get more confidence in lexer implementation. func parseJSONGoStd(s string, _ parseConfig) (JSON, error) { @@ -1857,6 +1894,85 @@ func (j jsonObject) FetchValKey(key string) (JSON, error) { return nil, nil } +func (j jsonNull) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + buf = encoding.EncodeJSONNullKeyMarker(buf, dir) + return buf, nil +} + +func (j jsonString) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + buf = encoding.EncodeJSONStringKeyMarker(buf, dir) + + switch dir { + case encoding.Ascending: + buf = encoding.EncodeStringAscending(buf, string(j)) + case encoding.Descending: + buf = encoding.EncodeStringDescending(buf, string(j)) + default: + return nil, errors.AssertionFailedf("invalid direction") + } + buf = encoding.EncodeJSONKeyTerminator(buf, dir) + return buf, nil +} + +func (j jsonNumber) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + buf = encoding.EncodeJSONNumberKeyMarker(buf, dir) + var dec = apd.Decimal(j) + switch dir { + case encoding.Ascending: + buf = encoding.EncodeDecimalAscending(buf, &dec) + case encoding.Descending: + buf = encoding.EncodeDecimalDescending(buf, &dec) + default: + return nil, errors.AssertionFailedf("invalid direction") + } + buf = encoding.EncodeJSONKeyTerminator(buf, dir) + return buf, nil +} + +func (j jsonFalse) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + buf = encoding.EncodeJSONFalseKeyMarker(buf, dir) + return buf, nil +} + +func (j jsonTrue) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + buf = encoding.EncodeJSONTrueKeyMarker(buf, dir) + return buf, nil +} + +func (j jsonArray) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + buf = encoding.EncodeJSONArrayKeyMarker(buf, dir) + buf = encoding.EncodeJSONValueLength(buf, dir, int64(len(j))) + + var err error + for _, a := range j { + buf, err = a.EncodeForwardIndex(buf, dir) + if err != nil { + return nil, err + } + } + buf = encoding.EncodeJSONKeyTerminator(buf, dir) + return buf, nil +} + +func (j jsonObject) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { + buf = encoding.EncodeJSONObjectKeyMarker(buf, dir) + buf = encoding.EncodeJSONValueLength(buf, dir, int64(len(j))) + + var err error + for _, a := range j { + buf, err = a.k.EncodeForwardIndex(buf, dir) + if err != nil { + return nil, err + } + buf, err = a.v.EncodeForwardIndex(buf, dir) + if err != nil { + return nil, err + } + } + buf = encoding.EncodeJSONKeyTerminator(buf, dir) + return buf, nil +} + func (jsonNull) FetchValKey(string) (JSON, error) { return nil, nil } func (jsonTrue) FetchValKey(string) (JSON, error) { return nil, nil } func (jsonFalse) FetchValKey(string) (JSON, error) { return nil, nil }