Skip to content

Commit

Permalink
sql: enabling forward indexes and ORDERBY on JSONB columns
Browse files Browse the repository at this point in the history
Currently, #97928 outlines the scheme for JSONB encoding
and decoding for forward indexes. However, the PR doesn't
enable this feature to our users. This current PR aims
to allow forward indexes on JSONB columns. The presence
of a lexicographical ordering, as described in #97928,
shall now allow primary and secondary indexes on JSONB
columns along with the ability to use ORDER BY filter
in their queries.

Additionally, JSON values consist of decimal numbers
and containers, such as Arrays and Objects, which can
contain these decimal numbers. In order to preserve
the values after the decimal, JSONB columns are now
required to be composite in nature. This shall enable
such values to be stored in both the key and the value
side of a K/V pair in hopes of receiving the exact value.

Fixes: #35706

Release note (sql change): This PR adds support for enabling
forward indexes and ordering on JSON values.

Epic: CRDB-24501
  • Loading branch information
Shivs11 committed Apr 11, 2023
1 parent a5d61d2 commit 5cdb625
Show file tree
Hide file tree
Showing 37 changed files with 1,471 additions and 87 deletions.
7 changes: 7 additions & 0 deletions pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions pkg/sql/catalog/colinfo/col_type_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func ColumnTypeIsIndexable(t *types.T) bool {
// using an inverted index.
func ColumnTypeIsInvertedIndexable(t *types.T) bool {
switch t.Family() {
case types.ArrayFamily, types.StringFamily:
case types.JsonFamily, types.ArrayFamily, types.StringFamily:
return true
}
return ColumnTypeIsOnlyInvertedIndexable(t)
Expand All @@ -162,7 +162,6 @@ func ColumnTypeIsOnlyInvertedIndexable(t *types.T) bool {
t = t.ArrayContents()
}
switch t.Family() {
case types.JsonFamily:
case types.GeographyFamily:
case types.GeometryFamily:
case types.TSVectorFamily:
Expand All @@ -183,7 +182,7 @@ func MustBeValueEncoded(semanticType *types.T) bool {
default:
return MustBeValueEncoded(semanticType.ArrayContents())
}
case types.JsonFamily, types.TupleFamily, types.GeographyFamily, types.GeometryFamily:
case types.TupleFamily, types.GeographyFamily, types.GeometryFamily:
return true
case types.TSVectorFamily, types.TSQueryFamily:
return true
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/catalog/colinfo/column_type_properties.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func CanHaveCompositeKeyEncoding(typ *types.T) bool {
switch typ.Family() {
case types.FloatFamily,
types.DecimalFamily,
types.JsonFamily,
types.CollatedStringFamily:
return true
case types.ArrayFamily:
Expand All @@ -75,7 +76,6 @@ func CanHaveCompositeKeyEncoding(typ *types.T) bool {
types.UuidFamily,
types.INetFamily,
types.TimeFamily,
types.JsonFamily,
types.TimeTZFamily,
types.BitFamily,
types.GeometryFamily,
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/catalog/colinfo/column_type_properties_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestCanHaveCompositeKeyEncoding(t *testing.T) {
{types.IntArray, false},
{types.Interval, false},
{types.IntervalArray, false},
{types.Jsonb, false},
{types.Jsonb, true},
{types.Name, false},
{types.Oid, false},
{types.String, false},
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/catalog/table_col_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ func (s TableColSet) ForEach(f func(col descpb.ColumnID)) {
s.set.ForEach(func(i int) { f(descpb.ColumnID(i)) })
}

// Copy returns a copy of s which can be modified independently.
func (s TableColSet) Copy() TableColSet { return TableColSet{set: s.set.Copy()} }

// SubsetOf returns true if s is a subset of other.
func (s TableColSet) SubsetOf(other TableColSet) bool {
return s.set.SubsetOf(other.set)
Expand Down
12 changes: 10 additions & 2 deletions pkg/sql/catalog/tabledesc/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,14 @@ func (desc *Mutable) allocateIndexIDs(columnNames map[string]descpb.ColumnID) er
colIDs = idx.CollectKeyColumnIDs()
}

// Inverted indexes don't store composite values in the individual
// paths present. The composite values will be encoded in
// the primary index itself.
compositeColIDsLocal := compositeColIDs.Copy()
if isInverted {
compositeColIDsLocal.Remove(invID)
}

// StoreColumnIDs are derived from StoreColumnNames just like KeyColumnIDs
// derives from KeyColumnNames.
// For primary indexes this set of columns is typically defined as the set
Expand Down Expand Up @@ -755,12 +763,12 @@ func (desc *Mutable) allocateIndexIDs(columnNames map[string]descpb.ColumnID) er
// or in the primary key whose type has a composite encoding, like DECIMAL
// for instance.
for _, colID := range idx.IndexDesc().KeyColumnIDs {
if compositeColIDs.Contains(colID) {
if compositeColIDsLocal.Contains(colID) {
idx.IndexDesc().CompositeColumnIDs = append(idx.IndexDesc().CompositeColumnIDs, colID)
}
}
for _, colID := range idx.IndexDesc().KeySuffixColumnIDs {
if compositeColIDs.Contains(colID) {
if compositeColIDsLocal.Contains(colID) {
idx.IndexDesc().CompositeColumnIDs = append(idx.IndexDesc().CompositeColumnIDs, colID)
}
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/colenc/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,9 @@ func isComposite(vec coldata.Vec, row int) bool {
case types.DecimalFamily:
d := tree.DDecimal{Decimal: vec.Decimal()[row]}
return d.IsComposite()
case types.JsonFamily:
j := tree.DJSON{JSON: vec.JSON().Get(row)}
return j.IsComposite()
default:
d := vec.Datum().Get(row)
if cdatum, ok := d.(tree.CompositeDatum); ok {
Expand Down
15 changes: 15 additions & 0 deletions pkg/sql/colenc/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,21 @@ func encodeKeys[T []byte | roachpb.Key](
}
kys[r] = b
}
case types.JsonFamily:
jsonVector := vec.JSON()
for r := 0; r < count; r++ {
b := kys[r]
if partialIndexAndNullCheck(kys, r, start, nulls, dir) {
continue
}
var err error
jsonObj := jsonVector.Get(r + start)
b, err = jsonObj.EncodeForwardIndex(b, dir)
if err != nil {
return err
}
kys[r] = b
}
default:
if buildutil.CrdbTestBuild {
if typeconv.TypeFamilyToCanonicalTypeFamily(typ.Family()) != typeconv.DatumVecCanonicalTypeFamily {
Expand Down
21 changes: 15 additions & 6 deletions pkg/sql/colencoding/key_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,21 @@ func decodeTableKeyToCol(
}
vecs.IntervalCols[colIdx][rowIdx] = d
case types.JsonFamily:
// Don't attempt to decode the JSON value. Instead, just return the
// remaining bytes of the key.
var jsonLen int
jsonLen, err = encoding.PeekLength(key)
vecs.JSONCols[colIdx].Bytes.Set(rowIdx, key[:jsonLen])
rkey = key[jsonLen:]
// Decode the JSON, and then store the bytes in the
// vector in the value-encoded format.
// TODO (shivam): Make it possible for the vector to store
// key-encoded JSONs instead of value-encoded JSONs.
var d tree.Datum
encDir := encoding.Ascending
if dir == catenumpb.IndexColumn_DESC {
encDir = encoding.Descending
}
d, rkey, err = keyside.Decode(da, valType, key, encDir)
json, ok := d.(*tree.DJSON)
if !ok {
return nil, false, scratch, errors.AssertionFailedf("Could not type assert into DJSON")
}
vecs.JSONCols[colIdx].Set(rowIdx, json.JSON)
case types.EncodedKeyFamily:
// Don't attempt to decode the inverted key.
keyLen, err := encoding.PeekLength(key)
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -1257,7 +1257,7 @@ ORDER BY
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {b} 3 0 true
s {j} 3 0 false
s {j} 3 0 true
s {rowid} 3 0 true

# Test that non-index columns have histograms collected for them, with
Expand Down Expand Up @@ -2348,7 +2348,7 @@ SHOW STATISTICS USING JSON FOR TABLE j;
statement ok
ALTER TABLE j INJECT STATISTICS '$j_stats'

statement error pq: cannot create partial statistics on an inverted index column
statement error pq: table j does not contain a non-partial forward index with j as a prefix column
CREATE STATISTICS j_partial ON j FROM j USING EXTREMES;

statement ok
Expand Down
Loading

0 comments on commit 5cdb625

Please sign in to comment.