From d4a398a16fd1f9850438d679051350f6f80129a2 Mon Sep 17 00:00:00 2001 From: Shivam Saraf Date: Thu, 30 Mar 2023 10:22:24 -0400 Subject: [PATCH] sql: replicating JSON empty array ordering found in Postgres Currently, #97928 and #99275 are responsible for laying out a lexicographical ordering for JSON columns to be forward indexable in nature. This ordering is based on the rules posted by Postgres and is defined here: #99849 However, Postgres currently sorts the empty JSON array before any other JSON values. An issue has been opened: https://www.postgresql.org/message-id/17873-826fdc8bbcace4f1%40postgresql.org Thus, this PR intends on replicating this behaviour until the issue has been identified and resolved by Postgres. Epic: CRDB-24501 Release note (sql change): This PR shall now place the empty JSON array to have the lowest (highest) precedence when the JSON column is being sorted in ascending (descending) order. --- pkg/sql/logictest/testdata/logic_test/json | 4 +- pkg/sql/rowenc/keyside/json.go | 2 +- pkg/util/encoding/encoding.go | 61 +++++++++++++++------- pkg/util/json/json.go | 2 +- 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/json b/pkg/sql/logictest/testdata/logic_test/json index 3d313dd0ef6a..929318f96a55 100644 --- a/pkg/sql/logictest/testdata/logic_test/json +++ b/pkg/sql/logictest/testdata/logic_test/json @@ -1382,12 +1382,12 @@ INSERT INTO t VALUES query T SELECT x FROM t ORDER BY x; ---- +[] null "crdb" 1 false true -[] [null] [1] [{"a": "b"}] @@ -1430,12 +1430,12 @@ SELECT t1.x FROM t1 INNER LOOKUP JOIN t2 ON t1.x = t2.x query T SELECT x FROM t GROUP BY x ORDER BY x; ---- +[] null "crdb" 1 false true -[] [null] [1] [{"a": "b"}] diff --git a/pkg/sql/rowenc/keyside/json.go b/pkg/sql/rowenc/keyside/json.go index d98dd74cc4e1..163ed3d43164 100644 --- a/pkg/sql/rowenc/keyside/json.go +++ b/pkg/sql/rowenc/keyside/json.go @@ -79,7 +79,7 @@ func decodeJSONKey(buf []byte, dir encoding.Direction) (json.JSON, []byte, error } buf = buf[1:] // removing the terminator jsonVal = json.FromDecimal(dec) - case encoding.JSONArray, encoding.JSONArrayDesc: + case encoding.JSONArray, encoding.JSONArrayDesc, encoding.JsonEmptyArray, encoding.JsonEmptyArrayDesc: jsonVal, buf, err = decodeJSONArray(buf, dir) if err != nil { return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON Array") diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index 22f573d86edf..0a420f7baa65 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -106,13 +106,17 @@ const ( // Defining different key markers, for the ascending designation, // for handling different JSON values. - jsonNullKeyMarker = voidMarker + 1 - jsonStringKeyMarker = jsonNullKeyMarker + 1 - jsonNumberKeyMarker = jsonStringKeyMarker + 1 - jsonFalseKeyMarker = jsonNumberKeyMarker + 1 - jsonTrueKeyMarker = jsonFalseKeyMarker + 1 - jsonArrayKeyMarker = jsonTrueKeyMarker + 1 - jsonObjectKeyMarker = jsonArrayKeyMarker + 1 + + // Postgres currently has a special case (maybe a bug) where + // the empty JSON Array sorts before all other JSON values. + jsonEmptyArrayKeyMarker = voidMarker + 1 + jsonNullKeyMarker = jsonEmptyArrayKeyMarker + 1 + jsonStringKeyMarker = jsonNullKeyMarker + 1 + jsonNumberKeyMarker = jsonStringKeyMarker + 1 + jsonFalseKeyMarker = jsonNumberKeyMarker + 1 + jsonTrueKeyMarker = jsonFalseKeyMarker + 1 + jsonArrayKeyMarker = jsonTrueKeyMarker + 1 + jsonObjectKeyMarker = jsonArrayKeyMarker + 1 arrayKeyTerminator byte = 0x00 arrayKeyDescendingTerminator byte = 0xFF @@ -126,13 +130,14 @@ const ( // Defining different key markers, for the descending designation, // for handling different JSON values. - jsonNullKeyDescendingMarker = jsonObjectKeyMarker + 7 - jsonStringKeyDescendingMarker = jsonNullKeyDescendingMarker - 1 - jsonNumberKeyDescendingMarker = jsonStringKeyDescendingMarker - 1 - jsonFalseKeyDescendingMarker = jsonNumberKeyDescendingMarker - 1 - jsonTrueKeyDescendingMarker = jsonFalseKeyDescendingMarker - 1 - jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1 - jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1 + jsonEmptyArrayKeyDescendingMarker = jsonObjectKeyMarker + 8 + jsonNullKeyDescendingMarker = jsonEmptyArrayKeyDescendingMarker - 1 + jsonStringKeyDescendingMarker = jsonNullKeyDescendingMarker - 1 + jsonNumberKeyDescendingMarker = jsonStringKeyDescendingMarker - 1 + jsonFalseKeyDescendingMarker = jsonNumberKeyDescendingMarker - 1 + jsonTrueKeyDescendingMarker = jsonFalseKeyDescendingMarker - 1 + jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1 + jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1 // Terminators for JSON Key encoding. jsonKeyTerminator byte = 0x00 @@ -1750,6 +1755,9 @@ const ( JSONArrayDesc Type = 39 JSONObject Type = 40 JSONObjectDesc Type = 41 + // Special case + JsonEmptyArray Type = 42 + JsonEmptyArrayDesc Type = 43 ) // typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one @@ -1810,6 +1818,10 @@ func slowPeekType(b []byte) Type { return JSONArray case m == jsonArrayKeyDescendingMarker: return JSONArrayDesc + case m == jsonEmptyArrayKeyMarker: + return JsonEmptyArray + case m == jsonEmptyArrayKeyDescendingMarker: + return JsonEmptyArrayDesc case m == jsonObjectKeyMarker: return JSONObject case m == jsonObjectKeyDescendingMarker: @@ -1970,7 +1982,8 @@ func PeekLength(b []byte) (int, error) { length, err := getArrayOrJSONLength(b[1:], dir, IsJSONKeyDone) return 1 + length, err case jsonArrayKeyMarker, jsonArrayKeyDescendingMarker, - jsonObjectKeyMarker, jsonObjectKeyDescendingMarker: + jsonObjectKeyMarker, jsonObjectKeyDescendingMarker, + jsonEmptyArrayKeyMarker, jsonEmptyArrayKeyDescendingMarker: dir := Ascending if (m == jsonArrayKeyDescendingMarker) || (m == jsonObjectKeyDescendingMarker) { @@ -3461,12 +3474,20 @@ func EncodeJSONTrueKeyMarker(buf []byte, dir Direction) []byte { // EncodeJSONArrayKeyMarker adds a JSON Array key encoding marker // to buf and returns the new buffer. -func EncodeJSONArrayKeyMarker(buf []byte, dir Direction) []byte { +func EncodeJSONArrayKeyMarker(buf []byte, dir Direction, length int64) []byte { switch dir { case Ascending: - return append(buf, jsonArrayKeyMarker) + if length == 0 { + return append(buf, jsonEmptyArrayKeyMarker) + } else { + return append(buf, jsonArrayKeyMarker) + } case Descending: - return append(buf, jsonArrayKeyDescendingMarker) + if length == 0 { + return append(buf, jsonEmptyArrayKeyDescendingMarker) + } else { + return append(buf, jsonArrayKeyDescendingMarker) + } default: panic("invalid direction") } @@ -3582,7 +3603,7 @@ func ValidateAndConsumeJSONKeyMarker(buf []byte, dir Direction) ([]byte, Type, e case Descending: switch typ { case JSONNullDesc, JSONNumberDesc, JSONStringDesc, JSONFalseDesc, - JSONTrueDesc, JSONArrayDesc, JSONObjectDesc: + JSONTrueDesc, JSONArrayDesc, JSONObjectDesc, JsonEmptyArrayDesc: return buf[1:], typ, nil default: return nil, Unknown, errors.Newf("invalid type found %s", typ) @@ -3590,7 +3611,7 @@ func ValidateAndConsumeJSONKeyMarker(buf []byte, dir Direction) ([]byte, Type, e case Ascending: switch typ { case JSONNull, JSONNumber, JSONString, JSONFalse, JSONTrue, JSONArray, - JSONObject: + JSONObject, JsonEmptyArray: return buf[1:], typ, nil default: return nil, Unknown, errors.Newf("invalid type found %s", typ) diff --git a/pkg/util/json/json.go b/pkg/util/json/json.go index 0327355ebae4..03e0fc65a77b 100644 --- a/pkg/util/json/json.go +++ b/pkg/util/json/json.go @@ -1978,7 +1978,7 @@ func (j jsonTrue) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte } func (j jsonArray) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { - buf = encoding.EncodeJSONArrayKeyMarker(buf, dir) + buf = encoding.EncodeJSONArrayKeyMarker(buf, dir, int64(len(j))) buf = encoding.EncodeJSONValueLength(buf, dir, int64(len(j))) var err error