diff --git a/docs/tech-notes/jsonb_forward_indexing.md b/docs/tech-notes/jsonb_forward_indexing.md index 732ceac1f6e4..f4b46830c8dc 100644 --- a/docs/tech-notes/jsonb_forward_indexing.md +++ b/docs/tech-notes/jsonb_forward_indexing.md @@ -44,10 +44,18 @@ The following rules were kept in mind while designing this form of encoding, as 5. Objects with an equal number of key value pairs are compared in the order: `key1`, `value1`, `key2`, `value2`, …. +**NOTE:** There is one exception to these rules, which is neither documented by +Postgres, nor mentioned in the source code: empty arrays are the minimum JSON +value. As far as we can tell, this is a Postgres bug that has existed for some +time. We've decided to replicate this behavior to remain consistent with +Postgres. We've filed a [Postgres bug report](https://www.postgresql.org/message-id/17873-826fdc8bbcace4f1%40postgresql.org) +to track the issue. + In order to satisfy property 1 at all times, tags are defined in an increasing order of bytes. These tags will also have to be defined in a way where the tag representing an object is a large byte representation for a hexadecimal value (such as 0xff) and the subsequent objects have a value 1 less than the previous one, -where the ordering is described in point 1 above. +where the ordering is described in point 1 above. There is a special tag for empty JSON arrays +in order to handle the special case of empty arrays being ordered before all other JSON values. Additionally, tags representing terminators will also be defined. There will be two terminators, one for the ascending designation and the other for the descending one, and will be required to denote the end of a key encoding of the following JSON values: Objects, Arrays, Number and Strings. JSON Boolean and JSON Null are not required to have the terminator since they do not have variable length encoding due to the presence of a single tag (as explained later in this document). diff --git a/pkg/sql/logictest/testdata/logic_test/json_index b/pkg/sql/logictest/testdata/logic_test/json_index index e27bd0e38ff0..2c1c076dd931 100644 --- a/pkg/sql/logictest/testdata/logic_test/json_index +++ b/pkg/sql/logictest/testdata/logic_test/json_index @@ -20,13 +20,13 @@ INSERT INTO t VALUES query T SELECT x FROM t ORDER BY x ---- +[] "a" "aa" "abcdefghi" "b" 1 100 -[] {"a": "b"} @@ -38,13 +38,13 @@ INSERT INTO t VALUES query T SELECT x FROM t@t_pkey ORDER BY x ---- +[] "a" "aa" "abcdefghi" "b" 1 100 -[] {"a": "b"} # Use the index for point lookups. @@ -77,12 +77,12 @@ query T SELECT x FROM t@t_pkey WHERE x > '1' ORDER BY x ---- 100 -[] {"a": "b"} query T SELECT x FROM t@t_pkey WHERE x < '1' ORDER BY x ---- +[] "a" "aa" "abcdefghi" @@ -92,12 +92,12 @@ SELECT x FROM t@t_pkey WHERE x < '1' ORDER BY x query T SELECT x FROM t@t_pkey WHERE x > '1' OR x < '1' ORDER BY x ---- +[] "a" "aa" "abcdefghi" "b" 100 -[] {"a": "b"} query T @@ -109,12 +109,12 @@ query T SELECT x FROM t@t_pkey WHERE x > '1' OR x < '1' ORDER BY x DESC ---- {"a": "b"} -[] 100 "b" "abcdefghi" "aa" "a" +[] # Adding more primitive JSON values. statement ok @@ -129,6 +129,7 @@ INSERT INTO t VALUES query T SELECT x FROM t@t_pkey ORDER BY x ---- +[] null "Testing Punctuation?!." "a" @@ -141,18 +142,17 @@ null 100 false true -[] {"a": "b"} query T SELECT x FROM t@t_pkey WHERE x > 'true' ORDER BY x ---- -[] {"a": "b"} query T SELECT x FROM t@t_pkey WHERE x < 'false' ORDER BY x ---- +[] null "Testing Punctuation?!." "a" @@ -330,12 +330,12 @@ query T SELECT x FROM t@t_pkey ORDER BY x ---- NULL +[] null "crdb" 1 false true -[] [1, 2, 3] {} {"a": "b", "c": "d"} @@ -346,24 +346,24 @@ SELECT x FROM t@t_pkey ORDER BY x DESC {"a": "b", "c": "d"} {} [1, 2, 3] -[] true false 1 "crdb" null +[] NULL # Test to show JSON Null is different from NULL. query T SELECT x FROM t@t_pkey WHERE x IS NOT NULL ORDER BY x ---- +[] null "crdb" 1 false true -[] [1, 2, 3] {} {"a": "b", "c": "d"} @@ -446,12 +446,12 @@ INSERT INTO t VALUES query T SELECT x FROM t@i ORDER BY x; ---- +[] null "crdb" 1 false true -[] [null] [1] [{"a": "b"}] diff --git a/pkg/sql/opt/exec/execbuilder/testdata/json b/pkg/sql/opt/exec/execbuilder/testdata/json index 4d0adf966e3c..b03029d5db4f 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/json +++ b/pkg/sql/opt/exec/execbuilder/testdata/json @@ -205,7 +205,7 @@ vectorized: true • scan missing stats table: t@t_pkey - spans: [/'null' - /'null'] [/'""' - /'""'] [/'[]' - /'[]'] [/'{}' - /'{}'] + spans: [/'[]' - /'[]'] [/'null' - /'null'] [/'""' - /'""'] [/'{}' - /'{}'] # Multicolumn index, including JSONB @@ -252,20 +252,20 @@ INSERT INTO composite VALUES (1, '1.00'::JSONB), (2, '1'::JSONB), (3, '2'::JSONB (4, '3.0'::JSONB), (5, '"a"'::JSONB) ---- CPut /Table/108/1/1/0 -> /TUPLE/ -InitPut /Table/108/2/"G*\x02\x00\x00\x89\x88" -> /BYTES/0x2f0f0c200000002000000403348964 +InitPut /Table/108/2/"H*\x02\x00\x00\x89\x88" -> /BYTES/0x2f0f0c200000002000000403348964 CPut /Table/108/1/2/0 -> /TUPLE/ -InitPut /Table/108/2/"G*\x02\x00\x00\x8a\x88" -> /BYTES/ +InitPut /Table/108/2/"H*\x02\x00\x00\x8a\x88" -> /BYTES/ CPut /Table/108/1/3/0 -> /TUPLE/ -InitPut /Table/108/2/"G*\x04\x00\x00\x8b\x88" -> /BYTES/ +InitPut /Table/108/2/"H*\x04\x00\x00\x8b\x88" -> /BYTES/ CPut /Table/108/1/4/0 -> /TUPLE/ -InitPut /Table/108/2/"G*\x06\x00\x00\x8c\x88" -> /BYTES/0x2f0f0c20000000200000040334891e +InitPut /Table/108/2/"H*\x06\x00\x00\x8c\x88" -> /BYTES/0x2f0f0c20000000200000040334891e CPut /Table/108/1/5/0 -> /TUPLE/ -InitPut /Table/108/2/"F\x12a\x00\x01\x00\x8d\x88" -> /BYTES/ +InitPut /Table/108/2/"G\x12a\x00\x01\x00\x8d\x88" -> /BYTES/ query T kvtrace SELECT j FROM composite where j = '1.00'::JSONB ---- -Scan /Table/108/2/"G*\x02\x00\x0{0"-1"} +Scan /Table/108/2/"H*\x02\x00\x0{0"-1"} query T SELECT j FROM composite ORDER BY j; diff --git a/pkg/sql/rowenc/keyside/json.go b/pkg/sql/rowenc/keyside/json.go index d98dd74cc4e1..163ed3d43164 100644 --- a/pkg/sql/rowenc/keyside/json.go +++ b/pkg/sql/rowenc/keyside/json.go @@ -79,7 +79,7 @@ func decodeJSONKey(buf []byte, dir encoding.Direction) (json.JSON, []byte, error } buf = buf[1:] // removing the terminator jsonVal = json.FromDecimal(dec) - case encoding.JSONArray, encoding.JSONArrayDesc: + case encoding.JSONArray, encoding.JSONArrayDesc, encoding.JsonEmptyArray, encoding.JsonEmptyArrayDesc: jsonVal, buf, err = decodeJSONArray(buf, dir) if err != nil { return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON Array") diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index 9662e7f301a8..343471503c58 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -107,13 +107,18 @@ const ( // Defining different key markers, for the ascending designation, // for handling different JSON values. - jsonNullKeyMarker = voidMarker + 1 - jsonStringKeyMarker = jsonNullKeyMarker + 1 - jsonNumberKeyMarker = jsonStringKeyMarker + 1 - jsonFalseKeyMarker = jsonNumberKeyMarker + 1 - jsonTrueKeyMarker = jsonFalseKeyMarker + 1 - jsonArrayKeyMarker = jsonTrueKeyMarker + 1 - jsonObjectKeyMarker = jsonArrayKeyMarker + 1 + + // Postgres currently has a special case (maybe a bug) where the empty JSON + // Array sorts before all other JSON values. See the bug report: + // https://www.postgresql.org/message-id/17873-826fdc8bbcace4f1%40postgresql.org + jsonEmptyArrayKeyMarker = voidMarker + 1 + jsonNullKeyMarker = jsonEmptyArrayKeyMarker + 1 + jsonStringKeyMarker = jsonNullKeyMarker + 1 + jsonNumberKeyMarker = jsonStringKeyMarker + 1 + jsonFalseKeyMarker = jsonNumberKeyMarker + 1 + jsonTrueKeyMarker = jsonFalseKeyMarker + 1 + jsonArrayKeyMarker = jsonTrueKeyMarker + 1 + jsonObjectKeyMarker = jsonArrayKeyMarker + 1 arrayKeyTerminator byte = 0x00 arrayKeyDescendingTerminator byte = 0xFF @@ -127,13 +132,14 @@ const ( // Defining different key markers, for the descending designation, // for handling different JSON values. - jsonNullKeyDescendingMarker = jsonObjectKeyMarker + 7 - jsonStringKeyDescendingMarker = jsonNullKeyDescendingMarker - 1 - jsonNumberKeyDescendingMarker = jsonStringKeyDescendingMarker - 1 - jsonFalseKeyDescendingMarker = jsonNumberKeyDescendingMarker - 1 - jsonTrueKeyDescendingMarker = jsonFalseKeyDescendingMarker - 1 - jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1 - jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1 + jsonEmptyArrayKeyDescendingMarker = jsonObjectKeyMarker + 8 + jsonNullKeyDescendingMarker = jsonEmptyArrayKeyDescendingMarker - 1 + jsonStringKeyDescendingMarker = jsonNullKeyDescendingMarker - 1 + jsonNumberKeyDescendingMarker = jsonStringKeyDescendingMarker - 1 + jsonFalseKeyDescendingMarker = jsonNumberKeyDescendingMarker - 1 + jsonTrueKeyDescendingMarker = jsonFalseKeyDescendingMarker - 1 + jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1 + jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1 // Terminators for JSON Key encoding. jsonKeyTerminator byte = 0x00 @@ -1789,6 +1795,9 @@ const ( JSONArrayDesc Type = 39 JSONObject Type = 40 JSONObjectDesc Type = 41 + // Special case + JsonEmptyArray Type = 42 + JsonEmptyArrayDesc Type = 43 ) // typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one @@ -1849,6 +1858,10 @@ func slowPeekType(b []byte) Type { return JSONArray case m == jsonArrayKeyDescendingMarker: return JSONArrayDesc + case m == jsonEmptyArrayKeyMarker: + return JsonEmptyArray + case m == jsonEmptyArrayKeyDescendingMarker: + return JsonEmptyArrayDesc case m == jsonObjectKeyMarker: return JSONObject case m == jsonObjectKeyDescendingMarker: @@ -2009,10 +2022,12 @@ func PeekLength(b []byte) (int, error) { length, err := getArrayOrJSONLength(b[1:], dir, IsJSONKeyDone) return 1 + length, err case jsonArrayKeyMarker, jsonArrayKeyDescendingMarker, - jsonObjectKeyMarker, jsonObjectKeyDescendingMarker: + jsonObjectKeyMarker, jsonObjectKeyDescendingMarker, + jsonEmptyArrayKeyMarker, jsonEmptyArrayKeyDescendingMarker: dir := Ascending if (m == jsonArrayKeyDescendingMarker) || - (m == jsonObjectKeyDescendingMarker) { + (m == jsonObjectKeyDescendingMarker) || + (m == jsonEmptyArrayKeyDescendingMarker) { dir = Descending } // removing the starter tag @@ -3500,11 +3515,17 @@ func EncodeJSONTrueKeyMarker(buf []byte, dir Direction) []byte { // EncodeJSONArrayKeyMarker adds a JSON Array key encoding marker // to buf and returns the new buffer. -func EncodeJSONArrayKeyMarker(buf []byte, dir Direction) []byte { +func EncodeJSONArrayKeyMarker(buf []byte, dir Direction, arrayLength int64) []byte { switch dir { case Ascending: + if arrayLength == 0 { + return append(buf, jsonEmptyArrayKeyMarker) + } return append(buf, jsonArrayKeyMarker) case Descending: + if arrayLength == 0 { + return append(buf, jsonEmptyArrayKeyDescendingMarker) + } return append(buf, jsonArrayKeyDescendingMarker) default: panic("invalid direction") @@ -3621,7 +3642,7 @@ func ValidateAndConsumeJSONKeyMarker(buf []byte, dir Direction) ([]byte, Type, e case Descending: switch typ { case JSONNullDesc, JSONNumberDesc, JSONStringDesc, JSONFalseDesc, - JSONTrueDesc, JSONArrayDesc, JSONObjectDesc: + JSONTrueDesc, JSONArrayDesc, JSONObjectDesc, JsonEmptyArrayDesc: return buf[1:], typ, nil default: return nil, Unknown, errors.Newf("invalid type found %s", typ) @@ -3629,7 +3650,7 @@ func ValidateAndConsumeJSONKeyMarker(buf []byte, dir Direction) ([]byte, Type, e case Ascending: switch typ { case JSONNull, JSONNumber, JSONString, JSONFalse, JSONTrue, JSONArray, - JSONObject: + JSONObject, JsonEmptyArray: return buf[1:], typ, nil default: return nil, Unknown, errors.Newf("invalid type found %s", typ) diff --git a/pkg/util/encoding/type_string.go b/pkg/util/encoding/type_string.go index 4bdb56a300a1..a6eaee2c3d5e 100644 --- a/pkg/util/encoding/type_string.go +++ b/pkg/util/encoding/type_string.go @@ -51,6 +51,8 @@ func _() { _ = x[JSONArrayDesc-39] _ = x[JSONObject-40] _ = x[JSONObjectDesc-41] + _ = x[JsonEmptyArray-42] + _ = x[JsonEmptyArrayDesc-43] } func (i Type) String() string { @@ -139,6 +141,10 @@ func (i Type) String() string { return "JSONObject" case JSONObjectDesc: return "JSONObjectDesc" + case JsonEmptyArray: + return "JsonEmptyArray" + case JsonEmptyArrayDesc: + return "JsonEmptyArrayDesc" default: return "Type(" + strconv.FormatInt(int64(i), 10) + ")" } diff --git a/pkg/util/json/encoded.go b/pkg/util/json/encoded.go index 24b8d19128b3..0afaf08a093d 100644 --- a/pkg/util/json/encoded.go +++ b/pkg/util/json/encoded.go @@ -606,10 +606,20 @@ func (j *jsonEncoded) AreKeysSorted() bool { return decoded.AreKeysSorted() } -func (j *jsonEncoded) Compare(other JSON) (int, error) { +func (j *jsonEncoded) Compare(other JSON) (_ int, err error) { if other == nil { return -1, nil } + // We must first check for the special case of empty arrays, which are the + // minimum JSON value. + switch { + case isEmptyArray(j) && isEmptyArray(other): + return 0, nil + case isEmptyArray(j): + return -1, nil + case isEmptyArray(other): + return 1, nil + } if cmp := cmpJSONTypes(j.Type(), other.Type()); cmp != 0 { return cmp, nil } diff --git a/pkg/util/json/json.go b/pkg/util/json/json.go index 0327355ebae4..2113c64dfb36 100644 --- a/pkg/util/json/json.go +++ b/pkg/util/json/json.go @@ -579,9 +579,29 @@ func cmpJSONTypes(a Type, b Type) int { return 0 } -func (j jsonNull) Compare(other JSON) (int, error) { return cmpJSONTypes(j.Type(), other.Type()), nil } -func (j jsonFalse) Compare(other JSON) (int, error) { return cmpJSONTypes(j.Type(), other.Type()), nil } -func (j jsonTrue) Compare(other JSON) (int, error) { return cmpJSONTypes(j.Type(), other.Type()), nil } +// isEmptyArray returns true if j is a JSON array with length 0. +func isEmptyArray(j JSON) bool { + return j.Type() == ArrayJSONType && j.Len() == 0 +} + +func (j jsonNull) Compare(other JSON) (int, error) { + if isEmptyArray(other) { + return 1, nil + } + return cmpJSONTypes(j.Type(), other.Type()), nil +} +func (j jsonFalse) Compare(other JSON) (int, error) { + if isEmptyArray(other) { + return 1, nil + } + return cmpJSONTypes(j.Type(), other.Type()), nil +} +func (j jsonTrue) Compare(other JSON) (int, error) { + if isEmptyArray(other) { + return 1, nil + } + return cmpJSONTypes(j.Type(), other.Type()), nil +} func decodeIfNeeded(j JSON) (JSON, error) { if enc, ok := j.(*jsonEncoded); ok { @@ -595,6 +615,9 @@ func decodeIfNeeded(j JSON) (JSON, error) { } func (j jsonNumber) Compare(other JSON) (int, error) { + if isEmptyArray(other) { + return 1, nil + } cmp := cmpJSONTypes(j.Type(), other.Type()) if cmp != 0 { return cmp, nil @@ -609,6 +632,9 @@ func (j jsonNumber) Compare(other JSON) (int, error) { } func (j jsonString) Compare(other JSON) (int, error) { + if isEmptyArray(other) { + return 1, nil + } cmp := cmpJSONTypes(j.Type(), other.Type()) if cmp != 0 { return cmp, nil @@ -629,6 +655,14 @@ func (j jsonString) Compare(other JSON) (int, error) { } func (j jsonArray) Compare(other JSON) (int, error) { + switch { + case isEmptyArray(j) && isEmptyArray(other): + return 0, nil + case isEmptyArray(j): + return -1, nil + case isEmptyArray(other): + return 1, nil + } cmp := cmpJSONTypes(j.Type(), other.Type()) if cmp != 0 { return cmp, nil @@ -660,6 +694,8 @@ func (j jsonArray) Compare(other JSON) (int, error) { } func (j jsonObject) Compare(other JSON) (int, error) { + // NOTE: There is no need to check if other is an empty array because all + // arrays are less than all objects, so the type comparison is sufficient. cmp := cmpJSONTypes(j.Type(), other.Type()) if cmp != 0 { return cmp, nil @@ -1978,7 +2014,7 @@ func (j jsonTrue) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte } func (j jsonArray) EncodeForwardIndex(buf []byte, dir encoding.Direction) ([]byte, error) { - buf = encoding.EncodeJSONArrayKeyMarker(buf, dir) + buf = encoding.EncodeJSONArrayKeyMarker(buf, dir, int64(len(j))) buf = encoding.EncodeJSONValueLength(buf, dir, int64(len(j))) var err error diff --git a/pkg/util/json/json_test.go b/pkg/util/json/json_test.go index d7f6b59352f3..2e6c5fa1615d 100644 --- a/pkg/util/json/json_test.go +++ b/pkg/util/json/json_test.go @@ -66,6 +66,9 @@ func TestJSONOrdering(t *testing.T) { // We test here that every element in order sorts before every one that comes // after it, and is equal to itself. sources := []string{ + // In Postgres's sorting rules, the empty array comes before everything, + // even null. + `[]`, `null`, `"a"`, `"aa"`, @@ -76,10 +79,8 @@ func TestJSONOrdering(t *testing.T) { `100`, `false`, `true`, - // In Postgres's sorting rules, the empty array comes before everything (even null), - // so this is a departure. - // Shorter arrays sort before longer arrays (this is the same as in Postgres). - `[]`, + // Shorter arrays sort before longer arrays (this is the same as in + // Postgres). `[1]`, `[2]`, `[1, 2]`, @@ -88,8 +89,9 @@ func TestJSONOrdering(t *testing.T) { `{}`, `{"a": 1}`, `{"a": 2}`, - // In Postgres, keys which are shorter sort before keys which are longer. This - // is not true for us (right now). TODO(justin): unclear if it should be. + // In Postgres, keys which are shorter sort before keys which are + // longer. This is not true for us (right now). + // TODO(justin): unclear if it should be. `{"aa": 1}`, `{"b": 1}`, `{"b": 2}`,