-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sql: support JSONB encoding and decoding for forward indexes
Currently, it is not possible to create a primary and a secondary index on a JSON column in CRDB. This is because forward indexes on JSONB columns have not yet been implemented due to a lack of a valid lexicographic ordering. To address this, a key encoding strategy was developed for each JSON value. In order to maintain a lexicographical ordering of the encodings of JSON values, different marker bytes were defined in an order similar to the order defined for these JSON values. Encodings for primitive JSON values, such as Null, False and True, only consist of their marker bytes. e.g: To encode a JSON False: `enc(false::JSONB) = [JSONB_Null_Tag]` Encodings for JSON Strings and Numbers consist of a concatenation of their respective marker bytes, the encoding of the string or the number in consideration and a terminator byte to indicate that the encoding for the JSON value has ended. e.g: To encode a JSON String '"a"': `enc('"a"'::JSONB) = [JSONB_String_Tag, enc("a"), JSONB_Terminator_Tag]` Encodings for JSON Arrays and Objects consist of a concatenation of their respective marker bytes, the total number of elements/key-value pairs present within the container (in bytes), the encodings of the elements present in the container followed by a terminator tag to indicate the encoding for the given JSON container has ended. e.g: To encode a JSON array '["a"]': `enc('["a"]'::JSONB) = [JSONB_Array_Tag, enc(1), JSONB_String_Tag, enc(a), JSONB_Terminator_Tag, JSONB_Terminator_Tag]` Epic: CRDB-24501 Fixes: #35706 Release note: None
- Loading branch information
Showing
9 changed files
with
598 additions
and
37 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
// Copyright 2023 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package keyside | ||
|
||
import ( | ||
"github.com/cockroachdb/apd/v3" | ||
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree" | ||
"github.com/cockroachdb/cockroach/pkg/util/encoding" | ||
"github.com/cockroachdb/cockroach/pkg/util/json" | ||
"github.com/cockroachdb/errors" | ||
) | ||
|
||
// encodeJSONKey is responsible for encoding the different JSON | ||
// values. | ||
func encodeJSONKey(b []byte, json *tree.DJSON, dir encoding.Direction) ([]byte, error) { | ||
return json.JSON.EncodeForwardIndex(b, dir) | ||
} | ||
|
||
// decodeJSONKey is responsible for decoding the different JSON | ||
// values. | ||
func decodeJSONKey(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { | ||
var err error | ||
var typ encoding.Type | ||
var jsonVal json.JSON | ||
|
||
buf, typ, err = encoding.ValidateAndConsumeJSONKeyMarker(buf, dir) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
switch typ { | ||
case encoding.JSONNull, encoding.JSONNullDesc: | ||
jsonVal, err = json.MakeJSON(nil) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON Null") | ||
} | ||
case encoding.JSONFalse, encoding.JSONFalseDesc: | ||
jsonVal, err = json.MakeJSON(false) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON False") | ||
} | ||
case encoding.JSONTrue, encoding.JSONTrueDesc: | ||
jsonVal, err = json.MakeJSON(true) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON True") | ||
} | ||
case encoding.JSONString, encoding.JSONStringDesc: | ||
jsonVal, buf, err = decodeJSONString(buf, dir) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON String") | ||
} | ||
case encoding.JSONNumber: | ||
var dec apd.Decimal | ||
buf, dec, err = encoding.DecodeDecimalAscending(buf, nil) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Number") | ||
} | ||
if len(buf) == 0 { | ||
return nil, nil, errors.New("cannot find JSON terminator") | ||
} | ||
buf = buf[1:] // removing the terminator | ||
jsonVal = json.FromDecimal(dec) | ||
case encoding.JSONNumberDesc: | ||
var dec apd.Decimal | ||
buf, dec, err = encoding.DecodeDecimalDescending(buf, nil) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Number") | ||
} | ||
if len(buf) == 0 { | ||
return nil, nil, errors.New("cannot find JSON terminator") | ||
} | ||
buf = buf[1:] // removing the terminator | ||
jsonVal = json.FromDecimal(dec) | ||
case encoding.JSONArray, encoding.JSONArrayDesc: | ||
jsonVal, buf, err = decodeJSONArray(buf, dir) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Array") | ||
} | ||
case encoding.JSONObject, encoding.JSONObjectDesc: | ||
jsonVal, buf, err = decodeJSONObject(buf, dir) | ||
if err != nil { | ||
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode the JSON Object") | ||
} | ||
} | ||
|
||
return jsonVal, buf, nil | ||
} | ||
|
||
func decodeJSONString(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { | ||
// extracting the total number of elements in the byte array | ||
var err error | ||
var str string | ||
buf, _, err = encoding.DecodeJSONValueLength(buf, dir) | ||
|
||
if err != nil { | ||
panic("could not decode the length of the JSON String") | ||
} | ||
|
||
switch dir { | ||
case encoding.Ascending: | ||
buf, str, err = encoding.DecodeUnsafeStringAscendingDeepCopy(buf, nil) | ||
case encoding.Descending: | ||
buf, str, err = encoding.DecodeUnsafeStringDescending(buf, nil) | ||
} | ||
if err != nil { | ||
panic("could not decode the JSON string") | ||
} | ||
buf = buf[1:] // removing the terminator | ||
jsonVal, err := json.MakeJSON(str) | ||
if err != nil { | ||
panic("could not make a JSON String from the input string") | ||
} | ||
return jsonVal, buf, nil | ||
} | ||
|
||
func decodeJSONArray(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { | ||
// extracting the total number of elements in the json array | ||
var err error | ||
buf, length, err := encoding.DecodeJSONValueLength(buf, dir) | ||
if err != nil { | ||
return nil, nil, errors.AssertionFailedf("could not decode the number" + | ||
"of elements in the JSON Array") | ||
} | ||
// Pre-allocate the array builder with `length` number | ||
// of JSON elements. | ||
jsonArray := json.NewArrayBuilder(int(length)) | ||
|
||
var childElem json.JSON | ||
for { | ||
if len(buf) == 0 { | ||
return nil, nil, errors.AssertionFailedf("invalid JSON array encoding (unterminated)") | ||
} | ||
if encoding.IsJSONKeyDone(buf, dir) { | ||
buf = buf[1:] | ||
return jsonArray.Build(), buf, nil | ||
} | ||
childElem, buf, err = decodeJSONKey(buf, dir) | ||
if err != nil { | ||
return nil, buf, err | ||
} | ||
jsonArray.Add(childElem) | ||
} | ||
} | ||
|
||
func decodeJSONObject(buf []byte, dir encoding.Direction) (json.JSON, []byte, error) { | ||
// extracting the total number of elements in the json object | ||
var err error | ||
buf, length, err := encoding.DecodeJSONValueLength(buf, dir) | ||
if err != nil { | ||
return nil, nil, errors.AssertionFailedf("could not decode the number" + | ||
"of elements in the JSON Object") | ||
} | ||
|
||
jsonObject := json.NewObjectBuilder(int(length)) | ||
var jsonKey, value json.JSON | ||
for { | ||
if len(buf) == 0 { | ||
return nil, nil, errors.AssertionFailedf("invalid JSON Object encoding (unterminated)") | ||
} | ||
if encoding.IsJSONKeyDone(buf, dir) { | ||
// JSONB Objects will have a terminator byte. | ||
buf = buf[1:] | ||
return jsonObject.Build(), buf, nil | ||
} | ||
|
||
// Assumption: The byte array given to us can be decoded into a | ||
// valid JSON Object. In other words, for each JSON key there | ||
// should be a valid JSON value. | ||
jsonKey, buf, err = decodeJSONKey(buf, dir) | ||
|
||
if err != nil { | ||
return nil, buf, err | ||
} | ||
|
||
key, err := jsonKey.AsText() | ||
if err != nil { | ||
return nil, buf, err | ||
} | ||
|
||
value, buf, err = decodeJSONKey(buf, dir) | ||
if err != nil { | ||
return nil, buf, err | ||
} | ||
|
||
jsonObject.Add(*key, value) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.